From 967465da2975fe4322080703ce5a77ea90752829 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 31 Aug 2020 09:43:40 -0400 Subject: cmd/compile: use combined shifts to improve array addressing on ppc64x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds rules to find pairs of instructions that can be combined into a single shifts. These instruction sequences are common in array addressing within loops. Improvements can be seen in many crypto packages and the hash packages. These are based on the extended mnemonics found in the ISA sections C.8.1 and C.8.2. Some rules in PPC64.rules were moved because the ordering prevented some matching. The following results were generated on power9. hash/crc32: CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41% CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50% CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46% CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80% CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27% CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15% CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22% CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79% CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56% CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53% crypto/rc4: RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1) RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1) RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1) crypto/sha1: Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1) Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1) Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1) Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1) There are other improvements found in golang.org/x/crypto which are all in the range of 5-15%. Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c Reviewed-on: https://go-review.googlesource.com/c/go/+/252097 Trust: Lynn Boger Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo --- test/codegen/shift.go | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'test/codegen/shift.go') diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 5e50ea6bff..32214851b5 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -150,6 +150,61 @@ func lshGuarded64(v int64, s uint) int64 { panic("shift too large") } +func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) { + + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f := tab[byte(v)^b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[byte(v)&b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[byte(v)|b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)&h] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)^h] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)|h] + // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" + // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" + f += tab[v&0xff] + // ppc64le:-".*AND",".*CLRLSLWI" + // ppc64:-".*AND",".*CLRLSLWI" + f += 2*uint32(uint16(d)) + // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" + // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" + g := 2*uint64(uint32(d)) + return f, g +} + +func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) { + + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + f := (v8 &0xF) << 2 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + f += byte(v16)<<3 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + g := (v16 & 0xFF) << 3 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + h := (v32 & 0xFFFFF) << 2 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + h += uint32(v64)<<4 + // ppc64le:-"AND","CLRLSLDI" + // ppc64:-"AND","CLRLSLDI" + i := (v64 & 0xFFFFFFFF) << 5 + return f, g, h, i +} + func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { // ppc64le:-".*MOVW" -- cgit v1.3 From a424f6e45e29960c933a7ccc1cd8fc9bb2766f15 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Wed, 23 Sep 2020 11:06:39 -0400 Subject: cmd/asm,cmd/compile,cmd/internal/obj/ppc64: add extswsli support on power9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for the extswsli instruction which combines extsw followed by a shift. New benchmark demonstrates the improvement: name old time/op new time/op delta ExtShift 1.34µs ± 0% 1.30µs ± 0% -3.15% (p=0.057 n=4+3) Change-Id: I21b410676fdf15d20e0cbbaa75d7c6dcd3bbb7b0 Reviewed-on: https://go-review.googlesource.com/c/go/+/257017 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 1 + src/cmd/compile/internal/gc/bench_test.go | 12 ++++ src/cmd/compile/internal/ppc64/ssa.go | 2 +- src/cmd/compile/internal/ssa/gen/PPC64.rules | 2 + src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 1 + src/cmd/compile/internal/ssa/opGen.go | 15 ++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 36 ++++++++++ src/cmd/internal/obj/ppc64/a.out.go | 2 + src/cmd/internal/obj/ppc64/anames.go | 2 + src/cmd/internal/obj/ppc64/asm9.go | 104 +++++++++++++++++---------- test/codegen/shift.go | 7 +- 11 files changed, 142 insertions(+), 42 deletions(-) (limited to 'test/codegen/shift.go') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index e26f6f8933..88a7609ba8 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -266,6 +266,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 SRDCC R3, R4 // 7c841c37 ROTLW $16, R3, R4 // 5464803e ROTLW R3, R4, R5 // 5c85183e + EXTSWSLI $3, R4, R5 // 7c851ef4 RLWMI $7, R3, $65535, R6 // 50663c3e RLWMICC $7, R3, $65535, R6 // 50663c3f RLWNM $3, R4, $7, R6 // 54861f7e diff --git a/src/cmd/compile/internal/gc/bench_test.go b/src/cmd/compile/internal/gc/bench_test.go index 09aaf428c3..a2887f2f7b 100644 --- a/src/cmd/compile/internal/gc/bench_test.go +++ b/src/cmd/compile/internal/gc/bench_test.go @@ -20,6 +20,18 @@ func BenchmarkLoadAdd(b *testing.B) { } } +// Added for ppc64 extswsli on power9 +func BenchmarkExtShift(b *testing.B) { + x := make([]int32, 1024) + for i := 0; i < b.N; i++ { + var s int64 + for i := range x { + s ^= int64(x[i]+32) * 8 + } + globl = s + } +} + func BenchmarkModify(b *testing.B) { a := make([]int64, 1024) v := globl diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 4a83a0bdd7..a5fbdaffba 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -677,7 +677,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, - ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: + ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst: p := s.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 774d5096de..de30d003e6 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1025,6 +1025,8 @@ (SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) (SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) (SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +// special case for power9 +(SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) // Lose widening ops fed to stores (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstore [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index ed99c40cd2..28317928a8 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -223,6 +223,7 @@ func init() { {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits + {name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"}, {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 1fc0f7ea79..1fe00c7026 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1865,6 +1865,7 @@ const ( OpPPC64SLWconst OpPPC64ROTLconst OpPPC64ROTLWconst + OpPPC64EXTSWSLconst OpPPC64CNTLZD OpPPC64CNTLZW OpPPC64CNTTZD @@ -24849,6 +24850,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "EXTSWSLconst", + auxType: auxInt64, + argLen: 1, + asm: ppc64.AEXTSWSLI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "CNTLZD", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 12b08824b5..29ec3992f2 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -12877,6 +12877,24 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } break } + // match: (SLDconst [c] z:(MOVWreg x)) + // cond: c < 32 && objabi.GOPPC64 >= 9 + // result: (EXTSWSLconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWreg { + break + } + x := z.Args[0] + if !(c < 32 && objabi.GOPPC64 >= 9) { + break + } + v.reset(OpPPC64EXTSWSLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { @@ -13000,6 +13018,24 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } break } + // match: (SLWconst [c] z:(MOVWreg x)) + // cond: c < 32 && objabi.GOPPC64 >= 9 + // result: (EXTSWSLconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWreg { + break + } + x := z.Args[0] + if !(c < 32 && objabi.GOPPC64 >= 9) { + break + } + v.reset(OpPPC64EXTSWSLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool { diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index f438803fb5..4c97302f83 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -733,6 +733,8 @@ const ( ASRAD ASRADCC ASRDCC + AEXTSWSLI + AEXTSWSLICC ASTDCCC ATD diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index accd87fe00..fca4b3e355 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -329,6 +329,8 @@ var Anames = []string{ "SRAD", "SRADCC", "SRDCC", + "EXTSWSLI", + "EXTSWSLICC", "STDCCC", "TD", "DWORD", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 60dda72507..9f06bdf8b3 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -160,6 +160,8 @@ var optab = []Optab{ {ASLD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0}, {ASLD, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0}, {ASLD, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0}, + {AEXTSWSLI, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0}, + {AEXTSWSLI, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0}, {ASLW, C_SCON, C_REG, C_NONE, C_REG, 57, 4, 0}, {ASLW, C_SCON, C_NONE, C_NONE, C_REG, 57, 4, 0}, {ASRAW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0}, @@ -1877,6 +1879,9 @@ func buildop(ctxt *obj.Link) { case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRAWCC, r0) + case AEXTSWSLI: + opset(AEXTSWSLICC, r0) + case ASRAD: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRADCC, r0) @@ -2189,49 +2194,54 @@ func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 { return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5 } +func AOP_EXTSWSLI(op uint32, a uint32, s uint32, sh uint32) uint32 { + return op | (a&31)<<21 | (s&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 +} + func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 { return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6 } const ( /* each rhs is OPVCC(_, _, _, _) */ - OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 - OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 - OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 - OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 - OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 - OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 - OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 - OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 - OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 - OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 - OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 - OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 - OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 - OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0 - OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 - OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 - OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 - OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 - OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 - OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 - OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0 - OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0 - OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 - OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 - OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 - OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 - OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 - OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 - OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 - OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 - OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 - OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 - OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 - OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 - OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 - OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 - OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 + OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 + OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 + OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 + OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 + OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 + OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 + OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 + OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 + OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 + OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 + OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 + OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 + OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0 + OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 + OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 + OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 + OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 + OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 + OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 + OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0 + OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0 + OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 + OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 + OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 + OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 + OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 + OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 + OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 + OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 + OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 + OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 + OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 + OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 + OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 + OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 + OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_EXTSWSLI = 31<<26 | 445<<2 ) func oclass(a *obj.Addr) int { @@ -2965,14 +2975,21 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case AROTL: a = int(0) op = OP_RLDICL + case AEXTSWSLI: + a = int(v) default: c.ctxt.Diag("unexpected op in sldi case\n%v", p) a = 0 o1 = 0 } - o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) - if p.As == ASLDCC || p.As == ASRDCC { + if p.As == AEXTSWSLI || p.As == AEXTSWSLICC { + o1 = AOP_EXTSWSLI(OP_EXTSWSLI, uint32(r), uint32(p.To.Reg), uint32(v)) + + } else { + o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) + } + if p.As == ASLDCC || p.As == ASRDCC || p.As == AEXTSWSLICC { o1 |= 1 // Set the condition code bit } @@ -4350,6 +4367,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { case ASRADCC: return OPVCC(31, 794, 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) + case ASRW: return OPVCC(31, 536, 0, 0) case ASRWCC: @@ -5013,6 +5035,10 @@ func (c *ctxt9) opirr(a obj.As) uint32 { return OPVCC(31, (413 << 1), 0, 0) case ASRADCC: return OPVCC(31, (413 << 1), 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) case ASTSW: return OPVCC(31, 725, 0, 0) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 32214851b5..abc4b091c9 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -182,7 +182,7 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt return f, g } -func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) { +func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) { // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" @@ -202,7 +202,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, u // ppc64le:-"AND","CLRLSLDI" // ppc64:-"AND","CLRLSLDI" i := (v64 & 0xFFFFFFFF) << 5 - return f, g, h, i + // ppc64le/power9:-"SLD","EXTSWSLI" + // ppc64/power9:-"SLD","EXTSWSLI" + j := int64(x32+32)*8 + return f, g, h, i, j } func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { -- cgit v1.3 From cc2a5cf4b8b0aeaccd3dd439f8d3d68f25eef358 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 28 Sep 2020 18:20:12 -0400 Subject: cmd/compile,cmd/internal/obj/ppc64: fix some shift rules due to a regression A recent change to improve shifts was generating some invalid cases when the rule was based on an AND. The extended mnemonics CLRLSLDI and CLRLSLWI only allow certain values for the operands and in the mask case those values were not being checked properly. This adds a check to those rules to verify that the 'b' and 'n' values used when an AND was part of the rule have correct values. There was a bug in some diag messages in asm9. The message expected 3 values but only provided 2. Those are corrected here also. The test/codegen/shift.go was updated to add a few more cases to check for the case mentioned here. Some of the comments that mention the order of operands in these extended mnemonics were wrong and those have been corrected. Fixes #41683. Change-Id: If5bb860acaa5051b9e0cd80784b2868b85898c31 Reviewed-on: https://go-review.googlesource.com/c/go/+/258138 Run-TryBot: Lynn Boger Reviewed-by: Paul Murphy Reviewed-by: Carlos Eduardo Seo TryBot-Result: Go Bot Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 ++-- src/cmd/compile/internal/ppc64/ssa.go | 12 +++++----- src/cmd/compile/internal/ssa/gen/PPC64.rules | 9 ++++--- src/cmd/compile/internal/ssa/rewrite.go | 4 ++-- src/cmd/compile/internal/ssa/rewritePPC64.go | 34 +++++++-------------------- src/cmd/internal/obj/ppc64/asm9.go | 35 ++++++++++++++-------------- test/codegen/shift.go | 17 ++++++++------ 7 files changed, 50 insertions(+), 65 deletions(-) (limited to 'test/codegen/shift.go') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 88a7609ba8..869f8c2d4f 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -287,8 +287,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLDICRCC $0, R4, $15, R6 // 788603c5 RLDIC $0, R4, $15, R6 // 788603c8 RLDICCC $0, R4, $15, R6 // 788603c9 - CLRLSLWI $16, R5, $8, R4 // 54a4861e - CLRLSLDI $2, R4, $24, R3 // 78831588 + CLRLSLWI $8, R5, $6, R4 // 54a430b2 + CLRLSLDI $24, R4, $4, R3 // 78832508 BEQ 0(PC) // 41820000 BGE 0(PC) // 40800000 diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index a5fbdaffba..d83b2df379 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -570,9 +570,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { r1 := v.Args[0].Reg() shifts := v.AuxInt p := s.Prog(v.Op.Asm()) - // clrlslwi ra,rs,sh,mb will become rlwinm ra,rs,sh,mb-sh,31-n as described in ISA - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r @@ -582,9 +582,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { r1 := v.Args[0].Reg() shifts := v.AuxInt p := s.Prog(v.Op.Asm()) - // clrlsldi ra,rs,sh,mb will become rldic ra,rs,sh,mb-sh - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index de30d003e6..83ee4c499b 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1018,13 +1018,12 @@ (SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) (SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) -(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) -(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) (SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) (SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) -(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) -(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) -(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) // special case for power9 (SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9f4de83a77..5d8b3ddc4e 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1380,8 +1380,8 @@ func GetPPC64Shiftme(auxint int64) int64 { return int64(int8(auxint)) } -// Catch the simple ones first -// TODO: Later catch more cases +// This verifies that the mask occupies the +// rightmost bits. func isPPC64ValidShiftMask(v int64) bool { if ((v + 1) & v) == 0 { return true diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 29ec3992f2..9822637b05 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -12831,7 +12831,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { return true } // match: (SLDconst [c] z:(ANDconst [d] x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12841,7 +12841,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } d := auxIntToInt64(z.AuxInt) x := z.Args[0] - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d))) { break } v.reset(OpPPC64CLRLSLDI) @@ -12850,7 +12850,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { return true } // match: (SLDconst [c] z:(AND (MOVDconst [d]) x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12867,7 +12867,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } d := auxIntToInt64(z_0.AuxInt) x := z_1 - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d))) { continue } v.reset(OpPPC64CLRLSLDI) @@ -12953,26 +12953,8 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { v.AddArg(x) return true } - // match: (SLWconst [c] z:(MOVWZreg x)) - // cond: z.Uses == 1 && c < 24 - // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) - for { - c := auxIntToInt64(v.AuxInt) - z := v_0 - if z.Op != OpPPC64MOVWZreg { - break - } - x := z.Args[0] - if !(z.Uses == 1 && c < 24) { - break - } - v.reset(OpPPC64CLRLSLWI) - v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 8, 31, 32)) - v.AddArg(x) - return true - } // match: (SLWconst [c] z:(ANDconst [d] x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12982,7 +12964,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } d := auxIntToInt64(z.AuxInt) x := z.Args[0] - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (32-getPPC64ShiftMaskLength(d))) { break } v.reset(OpPPC64CLRLSLWI) @@ -12991,7 +12973,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { return true } // match: (SLWconst [c] z:(AND (MOVDconst [d]) x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) for { c := auxIntToInt64(v.AuxInt) @@ -13008,7 +12990,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } d := auxIntToInt64(z_0.AuxInt) x := z_1 - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (32-getPPC64ShiftMaskLength(d))) { continue } v.reset(OpPPC64CLRLSLWI) diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 9f06bdf8b3..928e299f43 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -2749,7 +2749,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { me := int(d) sh := c.regoff(&p.From) if me < 0 || me > 63 || sh > 63 { - c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh) + c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh, p) } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me)) @@ -2757,19 +2757,19 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { mb := int(d) sh := c.regoff(&p.From) if mb < 0 || mb > 63 || sh > 63 { - c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh) + c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh, p) } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb)) case ACLRLSLDI: // This is an extended mnemonic defined in the ISA section C.8.1 - // clrlsldi ra,rs,n,b --> rldic ra,rs,n,b-n + // clrlsldi ra,rs,b,n --> rldic ra,rs,n,b-n // It maps onto RLDIC so is directly generated here based on the operands from // the clrlsldi. - b := int(d) - n := c.regoff(&p.From) - if n > int32(b) || b > 63 { - c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b) + n := int32(d) + b := c.regoff(&p.From) + if n > b || b > 63 { + c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b, p) } o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n)) @@ -3395,14 +3395,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v := c.regoff(&p.From) switch p.As { case ACLRLSLWI: - b := c.regoff(p.GetFrom3()) + n := c.regoff(p.GetFrom3()) // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n // It maps onto rlwinm which is directly generated here. - if v < 0 || v > 32 || b > 32 { - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + if n > v || v >= 32 { + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) } - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) default: var mask [2]uint8 c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) @@ -3414,16 +3415,16 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v := c.regoff(&p.From) switch p.As { case ACLRLSLWI: - b := c.regoff(p.GetFrom3()) - if v > b || b > 32 { + n := c.regoff(p.GetFrom3()) + if n > v || v >= 32 { // Message will match operands from the ISA even though in the // code it uses 'v' - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) } // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n // It generates the rlwinm directly here. - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) default: var mask [2]uint8 c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index abc4b091c9..bbfc85ffbb 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -187,8 +187,8 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" f := (v8 &0xF) << 2 - // ppc64le:-"AND","CLRLSLWI" - // ppc64:-"AND","CLRLSLWI" + // ppc64le:"CLRLSLWI" + // ppc64:"CLRLSLWI" f += byte(v16)<<3 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" @@ -196,12 +196,15 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" h := (v32 & 0xFFFFF) << 2 - // ppc64le:-"AND","CLRLSLWI" - // ppc64:-"AND","CLRLSLWI" - h += uint32(v64)<<4 - // ppc64le:-"AND","CLRLSLDI" - // ppc64:-"AND","CLRLSLDI" + // ppc64le:"CLRLSLDI" + // ppc64:"CLRLSLDI" i := (v64 & 0xFFFFFFFF) << 5 + // ppc64le:-"CLRLSLDI" + // ppc64:-"CLRLSLDI" + i += (v64 & 0xFFFFFFF) << 38 + // ppc64le/power9:-"CLRLSLDI" + // ppc64/power9:-"CLRLSLDI" + i += (v64 & 0xFFFF00) << 10 // ppc64le/power9:-"SLD","EXTSWSLI" // ppc64/power9:-"SLD","EXTSWSLI" j := int64(x32+32)*8 -- cgit v1.3 From c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 23 Oct 2020 12:12:34 -0500 Subject: cmd/compile: combine more 32 bit shift and mask operations on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is and the shift value produce constant which can be encoded into an RLWINM instruction. Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate masks produces a constant which can be encoded into RLWINM. Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)). Combine rotate word + and operations which can be encoded as a single RLWINM/RLWNM instruction. The most notable performance improvements arise from the crypto benchmarks below (GOARCH=power8 on a ppc64le/linux): pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le ExpandKeyWithSalt 52.2µs ± 0% 47.5µs ± 0% -8.88% ExpandKey 44.4µs ± 0% 40.3µs ± 0% -9.15% pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le Key 57.6ms ± 0% 52.3ms ± 0% -9.13% pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le Equal 90.9ms ± 0% 82.6ms ± 0% -9.13% DefaultCost 91.0ms ± 0% 82.7ms ± 0% -9.12% Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce Reviewed-on: https://go-review.googlesource.com/c/go/+/260798 Run-TryBot: Carlos Eduardo Seo TryBot-Result: Go Bot Reviewed-by: Lynn Boger Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/compile/internal/ppc64/ssa.go | 18 ++ src/cmd/compile/internal/ssa/gen/PPC64.rules | 25 ++ src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 5 + src/cmd/compile/internal/ssa/opGen.go | 48 ++++ src/cmd/compile/internal/ssa/rewrite.go | 137 ++++++++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 368 +++++++++++++++++++++++++++ src/cmd/compile/internal/ssa/rewrite_test.go | 181 +++++++++++++ test/codegen/rotate.go | 45 ++++ test/codegen/shift.go | 94 +++++-- 9 files changed, 900 insertions(+), 21 deletions(-) (limited to 'test/codegen/shift.go') diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 79f18bfebb..3888aa6527 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -649,6 +649,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + // Auxint holds encoded rotate + mask + case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI: + rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt) + p := s.Prog(v.Op.Asm()) + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} + p.Reg = v.Args[0].Reg() + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)}) + + // Auxint holds mask + case ssa.OpPPC64RLWNM: + _, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt) + p := s.Prog(v.Op.Asm()) + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} + p.Reg = v.Args[0].Reg() + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)}) + case ssa.OpPPC64MADDLD: r := v.Reg() r1 := v.Args[0].Reg() diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 6175b42b89..558b09c9f2 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -150,6 +150,31 @@ (ROTLW x (MOVDconst [c])) => (ROTLWconst x [c&31]) (ROTL x (MOVDconst [c])) => (ROTLconst x [c&63]) +// Combine rotate and mask operations +(ANDconst [m] (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x) +(AND (MOVDconst [m]) (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x) +(ANDconst [m] (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r) +(AND (MOVDconst [m]) (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r) + +// Note, any rotated word bitmask is still a valid word bitmask. +(ROTLWconst [r] (AND (MOVDconst [m]) x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) +(ROTLWconst [r] (ANDconst [m] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) + +(ANDconst [m] (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0]) +(ANDconst [m] (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x) +(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0]) +(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x) + +(SRWconst (ANDconst [m] x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0]) +(SRWconst (ANDconst [m] x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) +(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0]) +(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) + +// Merge shift right + shift left and clear left (e.g for a table lookup) +(CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x) +(SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x) +// The following reduction shows up frequently too. e.g b[(x>>14)&0xFF] +(CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) // large constant shifts (Lsh64x64 _ (MOVDconst [c])) && uint64(c) >= 64 => (MOVDconst [0]) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index f4a53262f0..f7198b90c3 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -137,6 +137,7 @@ func init() { gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} + gp21a0 = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}} gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}} gp32 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}} @@ -227,6 +228,10 @@ func init() { {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits {name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"}, + {name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux + {name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux + {name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above + {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 779c19f72d..bb1cbc0baa 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1871,6 +1871,9 @@ const ( OpPPC64ROTLconst OpPPC64ROTLWconst OpPPC64EXTSWSLconst + OpPPC64RLWINM + OpPPC64RLWNM + OpPPC64RLWMI OpPPC64CNTLZD OpPPC64CNTLZW OpPPC64CNTTZD @@ -24971,6 +24974,51 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "RLWINM", + auxType: auxInt64, + argLen: 1, + asm: ppc64.ARLWNM, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "RLWNM", + auxType: auxInt64, + argLen: 2, + asm: ppc64.ARLWNM, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "RLWMI", + auxType: auxInt64, + argLen: 2, + resultInArg0: true, + asm: ppc64.ARLWMI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "CNTLZD", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index e5f858a339..9b3c83d1cf 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1381,6 +1381,71 @@ func GetPPC64Shiftme(auxint int64) int64 { return int64(int8(auxint)) } +// Test if this value can encoded as a mask for a rlwinm like +// operation. Masks can also extend from the msb and wrap to +// the lsb too. That is, the valid masks are 32 bit strings +// of the form: 0..01..10..0 or 1..10..01..1 or 1...1 +func isPPC64WordRotateMask(v64 int64) bool { + // Isolate rightmost 1 (if none 0) and add. + v := uint32(v64) + vp := (v & -v) + v + // Likewise, for the wrapping case. + vn := ^v + vpn := (vn & -vn) + vn + return (v&vp == 0 || vn&vpn == 0) && v != 0 +} + +// Compress mask and and shift into single value of the form +// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can +// be used to regenerate the input mask. +func encodePPC64RotateMask(rotate, mask, nbits int64) int64 { + var mb, me, mbn, men int + + // Determine boundaries and then decode them + if mask == 0 || ^mask == 0 || rotate >= nbits { + panic("Invalid PPC64 rotate mask") + } else if nbits == 32 { + mb = bits.LeadingZeros32(uint32(mask)) + me = 32 - bits.TrailingZeros32(uint32(mask)) + mbn = bits.LeadingZeros32(^uint32(mask)) + men = 32 - bits.TrailingZeros32(^uint32(mask)) + } else { + mb = bits.LeadingZeros64(uint64(mask)) + me = 64 - bits.TrailingZeros64(uint64(mask)) + mbn = bits.LeadingZeros64(^uint64(mask)) + men = 64 - bits.TrailingZeros64(^uint64(mask)) + } + // Check for a wrapping mask (e.g bits at 0 and 63) + if mb == 0 && me == int(nbits) { + // swap the inverted values + mb, me = men, mbn + } + + return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24) +} + +// The inverse operation of encodePPC64RotateMask. The values returned as +// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask. +func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) { + auxint := uint64(sauxint) + rotate = int64((auxint >> 16) & 0xFF) + mb = int64((auxint >> 8) & 0xFF) + me = int64((auxint >> 0) & 0xFF) + nbits := int64((auxint >> 24) & 0xFF) + mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1) + if mb > me { + mask = ^mask + } + if nbits == 32 { + mask = uint64(uint32(mask)) + } + + // Fixup ME to match ISA definition. The second argument to MASK(..,me) + // is inclusive. + me = (me - 1) & (nbits - 1) + return +} + // This verifies that the mask occupies the // rightmost bits. func isPPC64ValidShiftMask(v int64) bool { @@ -1394,6 +1459,78 @@ func getPPC64ShiftMaskLength(v int64) int64 { return int64(bits.Len64(uint64(v))) } +// Decompose a shift right into an equivalent rotate/mask, +// and return mask & m. +func mergePPC64RShiftMask(m, s, nbits int64) int64 { + smask := uint64((1<> uint(s) + return m & int64(smask) +} + +// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0 +func mergePPC64AndSrwi(m, s int64) int64 { + mask := mergePPC64RShiftMask(m, s, 32) + if !isPPC64WordRotateMask(mask) { + return 0 + } + return encodePPC64RotateMask(32-s, mask, 32) +} + +// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM. +// Return the encoded RLWINM constant, or 0 if they cannot be merged. +func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { + mask_1 := uint64(0xFFFFFFFF >> uint(srw)) + // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. + mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) + + // Rewrite mask to apply after the final left shift. + mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) + + r_1 := 32 - srw + r_2 := GetPPC64Shiftsh(sld) + r_3 := (r_1 + r_2) & 31 // This can wrap. + + if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { + return 0 + } + return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) +} + +// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return +// the encoded RLWINM constant, or 0 if they cannot be merged. +func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { + r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw) + // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. + mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) + + // combine the masks, and adjust for the final left shift. + mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld))) + r_2 := GetPPC64Shiftsh(int64(sld)) + r_3 := (r_1 + r_2) & 31 // This can wrap. + + // Verify the result is still a valid bitmask of <= 32 bits. + if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 { + return 0 + } + return encodePPC64RotateMask(r_3, int64(mask_3), 32) +} + +// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)), +// or return 0 if they cannot be combined. +func mergePPC64SldiSrw(sld, srw int64) int64 { + if sld > srw || srw >= 32 { + return 0 + } + mask_r := uint32(0xFFFFFFFF) >> uint(srw) + mask_l := uint32(0xFFFFFFFF) >> uint(sld) + mask := (mask_r & mask_l) << uint(sld) + return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32) +} + +// Convenience function to rotate a 32 bit constant value by another constant. +func rotateLeft32(v, rotate int64) int64 { + return int64(bits.RotateLeft32(uint32(v), int(rotate))) +} + // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. func armBFAuxInt(lsb, width int64) arm64BitField { if lsb < 0 || lsb > 63 { diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 84938fe27a..e5a23e8625 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -444,6 +444,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64ANDN(v) case OpPPC64ANDconst: return rewriteValuePPC64_OpPPC64ANDconst(v) + case OpPPC64CLRLSLDI: + return rewriteValuePPC64_OpPPC64CLRLSLDI(v) case OpPPC64CMP: return rewriteValuePPC64_OpPPC64CMP(v) case OpPPC64CMPU: @@ -598,6 +600,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64ROTL(v) case OpPPC64ROTLW: return rewriteValuePPC64_OpPPC64ROTLW(v) + case OpPPC64ROTLWconst: + return rewriteValuePPC64_OpPPC64ROTLWconst(v) case OpPPC64SLD: return rewriteValuePPC64_OpPPC64SLD(v) case OpPPC64SLDconst: @@ -614,6 +618,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64SRD(v) case OpPPC64SRW: return rewriteValuePPC64_OpPPC64SRW(v) + case OpPPC64SRWconst: + return rewriteValuePPC64_OpPPC64SRWconst(v) case OpPPC64SUB: return rewriteValuePPC64_OpPPC64SUB(v) case OpPPC64SUBFCconst: @@ -4212,6 +4218,100 @@ func rewriteValuePPC64_OpPPC64ADDconst(v *Value) bool { func rewriteValuePPC64_OpPPC64AND(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + // match: (AND (MOVDconst [m]) (ROTLWconst [r] x)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWINM [encodePPC64RotateMask(r,m,32)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpPPC64ROTLWconst { + continue + } + r := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] + if !(isPPC64WordRotateMask(m)) { + continue + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, m, 32)) + v.AddArg(x) + return true + } + break + } + // match: (AND (MOVDconst [m]) (ROTLW x r)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWNM [encodePPC64RotateMask(0,m,32)] x r) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpPPC64ROTLW { + continue + } + r := v_1.Args[1] + x := v_1.Args[0] + if !(isPPC64WordRotateMask(m)) { + continue + } + v.reset(OpPPC64RLWNM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32)) + v.AddArg2(x, r) + return true + } + break + } + // match: (AND (MOVDconst [m]) (SRWconst x [s])) + // cond: mergePPC64RShiftMask(m,s,32) == 0 + // result: (MOVDconst [0]) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpPPC64SRWconst { + continue + } + s := auxIntToInt64(v_1.AuxInt) + if !(mergePPC64RShiftMask(m, s, 32) == 0) { + continue + } + v.reset(OpPPC64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + break + } + // match: (AND (MOVDconst [m]) (SRWconst x [s])) + // cond: mergePPC64AndSrwi(m,s) != 0 + // result: (RLWINM [mergePPC64AndSrwi(m,s)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpPPC64SRWconst { + continue + } + s := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] + if !(mergePPC64AndSrwi(m, s) != 0) { + continue + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m, s)) + v.AddArg(x) + return true + } + break + } // match: (AND x (NOR y y)) // result: (ANDN x y) for { @@ -4347,6 +4447,76 @@ func rewriteValuePPC64_OpPPC64ANDN(v *Value) bool { } func rewriteValuePPC64_OpPPC64ANDconst(v *Value) bool { v_0 := v.Args[0] + // match: (ANDconst [m] (ROTLWconst [r] x)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWINM [encodePPC64RotateMask(r,m,32)] x) + for { + m := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ROTLWconst { + break + } + r := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(isPPC64WordRotateMask(m)) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, m, 32)) + v.AddArg(x) + return true + } + // match: (ANDconst [m] (ROTLW x r)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWNM [encodePPC64RotateMask(0,m,32)] x r) + for { + m := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ROTLW { + break + } + r := v_0.Args[1] + x := v_0.Args[0] + if !(isPPC64WordRotateMask(m)) { + break + } + v.reset(OpPPC64RLWNM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32)) + v.AddArg2(x, r) + return true + } + // match: (ANDconst [m] (SRWconst x [s])) + // cond: mergePPC64RShiftMask(m,s,32) == 0 + // result: (MOVDconst [0]) + for { + m := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64SRWconst { + break + } + s := auxIntToInt64(v_0.AuxInt) + if !(mergePPC64RShiftMask(m, s, 32) == 0) { + break + } + v.reset(OpPPC64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (ANDconst [m] (SRWconst x [s])) + // cond: mergePPC64AndSrwi(m,s) != 0 + // result: (RLWINM [mergePPC64AndSrwi(m,s)] x) + for { + m := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64SRWconst { + break + } + s := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64AndSrwi(m, s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m, s)) + v.AddArg(x) + return true + } // match: (ANDconst [c] (ANDconst [d] x)) // result: (ANDconst [c&d] x) for { @@ -4511,6 +4681,47 @@ func rewriteValuePPC64_OpPPC64ANDconst(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64CLRLSLDI(v *Value) bool { + v_0 := v.Args[0] + // match: (CLRLSLDI [c] (SRWconst [s] x)) + // cond: mergePPC64ClrlsldiSrw(int64(c),s) != 0 + // result: (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpPPC64SRWconst { + break + } + s := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64ClrlsldiSrw(int64(c), s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiSrw(int64(c), s)) + v.AddArg(x) + return true + } + // match: (CLRLSLDI [c] i:(RLWINM [s] x)) + // cond: mergePPC64ClrlsldiRlwinm(c,s) != 0 + // result: (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) + for { + c := auxIntToInt32(v.AuxInt) + i := v_0 + if i.Op != OpPPC64RLWINM { + break + } + s := auxIntToInt64(i.AuxInt) + x := i.Args[0] + if !(mergePPC64ClrlsldiRlwinm(c, s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiRlwinm(c, s)) + v.AddArg(x) + return true + } + return false +} func rewriteValuePPC64_OpPPC64CMP(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -12850,6 +13061,55 @@ func rewriteValuePPC64_OpPPC64ROTLW(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64ROTLWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (ROTLWconst [r] (AND (MOVDconst [m]) x)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) + for { + r := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64AND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0_0.AuxInt) + x := v_0_1 + if !(isPPC64WordRotateMask(m)) { + continue + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, rotateLeft32(m, r), 32)) + v.AddArg(x) + return true + } + break + } + // match: (ROTLWconst [r] (ANDconst [m] x)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) + for { + r := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ANDconst { + break + } + m := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(isPPC64WordRotateMask(m)) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, rotateLeft32(m, r), 32)) + v.AddArg(x) + return true + } + return false +} func rewriteValuePPC64_OpPPC64SLD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -12870,6 +13130,24 @@ func rewriteValuePPC64_OpPPC64SLD(v *Value) bool { } func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { v_0 := v.Args[0] + // match: (SLDconst [l] (SRWconst [r] x)) + // cond: mergePPC64SldiSrw(l,r) != 0 + // result: (RLWINM [mergePPC64SldiSrw(l,r)] x) + for { + l := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64SRWconst { + break + } + r := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64SldiSrw(l, r) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64SldiSrw(l, r)) + v.AddArg(x) + return true + } // match: (SLDconst [c] z:(MOVBZreg x)) // cond: c < 8 && z.Uses == 1 // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x) @@ -13186,6 +13464,96 @@ func rewriteValuePPC64_OpPPC64SRW(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64SRWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SRWconst (ANDconst [m] x) [s]) + // cond: mergePPC64RShiftMask(m>>uint(s),s,32) == 0 + // result: (MOVDconst [0]) + for { + s := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ANDconst { + break + } + m := auxIntToInt64(v_0.AuxInt) + if !(mergePPC64RShiftMask(m>>uint(s), s, 32) == 0) { + break + } + v.reset(OpPPC64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (SRWconst (ANDconst [m] x) [s]) + // cond: mergePPC64AndSrwi(m>>uint(s),s) != 0 + // result: (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) + for { + s := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ANDconst { + break + } + m := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64AndSrwi(m>>uint(s), s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m>>uint(s), s)) + v.AddArg(x) + return true + } + // match: (SRWconst (AND (MOVDconst [m]) x) [s]) + // cond: mergePPC64RShiftMask(m>>uint(s),s,32) == 0 + // result: (MOVDconst [0]) + for { + s := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64AND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0_0.AuxInt) + if !(mergePPC64RShiftMask(m>>uint(s), s, 32) == 0) { + continue + } + v.reset(OpPPC64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + break + } + // match: (SRWconst (AND (MOVDconst [m]) x) [s]) + // cond: mergePPC64AndSrwi(m>>uint(s),s) != 0 + // result: (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) + for { + s := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64AND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0_0.AuxInt) + x := v_0_1 + if !(mergePPC64AndSrwi(m>>uint(s), s) != 0) { + continue + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m>>uint(s), s)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64SUB(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssa/rewrite_test.go b/src/cmd/compile/internal/ssa/rewrite_test.go index 1a15d8c940..6fe429e85a 100644 --- a/src/cmd/compile/internal/ssa/rewrite_test.go +++ b/src/cmd/compile/internal/ssa/rewrite_test.go @@ -36,3 +36,184 @@ func TestSubFlags(t *testing.T) { t.Errorf("subFlags32(0,1).ult() returned false") } } + +func TestIsPPC64WordRotateMask(t *testing.T) { + tests := []struct { + input int64 + expected bool + }{ + {0x00000001, true}, + {0x80000001, true}, + {0x80010001, false}, + {0xFFFFFFFA, false}, + {0xF0F0F0F0, false}, + {0xFFFFFFFD, true}, + {0x80000000, true}, + {0x00000000, false}, + {0xFFFFFFFF, true}, + {0x0000FFFF, true}, + {0xFF0000FF, true}, + {0x00FFFF00, true}, + } + + for _, v := range tests { + if v.expected != isPPC64WordRotateMask(v.input) { + t.Errorf("isPPC64WordRotateMask(0x%x) failed", v.input) + } + } +} + +func TestEncodeDecodePPC64WordRotateMask(t *testing.T) { + tests := []struct { + rotate int64 + mask uint64 + nbits, + mb, + me, + encoded int64 + }{ + {1, 0x00000001, 32, 31, 31, 0x20011f20}, + {2, 0x80000001, 32, 31, 0, 0x20021f01}, + {3, 0xFFFFFFFD, 32, 31, 29, 0x20031f1e}, + {4, 0x80000000, 32, 0, 0, 0x20040001}, + {5, 0xFFFFFFFF, 32, 0, 31, 0x20050020}, + {6, 0x0000FFFF, 32, 16, 31, 0x20061020}, + {7, 0xFF0000FF, 32, 24, 7, 0x20071808}, + {8, 0x00FFFF00, 32, 8, 23, 0x20080818}, + + {9, 0x0000000000FFFF00, 64, 40, 55, 0x40092838}, + {10, 0xFFFF000000000000, 64, 0, 15, 0x400A0010}, + {10, 0xFFFF000000000001, 64, 63, 15, 0x400A3f10}, + } + + for i, v := range tests { + result := encodePPC64RotateMask(v.rotate, int64(v.mask), v.nbits) + if result != v.encoded { + t.Errorf("encodePPC64RotateMask(%d,0x%x,%d) = 0x%x, expected 0x%x", v.rotate, v.mask, v.nbits, result, v.encoded) + } + rotate, mb, me, mask := DecodePPC64RotateMask(result) + if rotate != v.rotate || mb != v.mb || me != v.me || mask != v.mask { + t.Errorf("DecodePPC64Failure(Test %d) got (%d, %d, %d, %x) expected (%d, %d, %d, %x)", i, rotate, mb, me, mask, v.rotate, v.mb, v.me, v.mask) + } + } +} + +func TestMergePPC64ClrlsldiSrw(t *testing.T) { + tests := []struct { + clrlsldi int32 + srw int64 + valid bool + rotate int64 + mask uint64 + }{ + // ((x>>4)&0xFF)<<4 + {newPPC64ShiftAuxInt(4, 56, 63, 64), 4, true, 0, 0xFF0}, + // ((x>>4)&0xFFFF)<<4 + {newPPC64ShiftAuxInt(4, 48, 63, 64), 4, true, 0, 0xFFFF0}, + // ((x>>4)&0xFFFF)<<17 + {newPPC64ShiftAuxInt(17, 48, 63, 64), 4, false, 0, 0}, + // ((x>>4)&0xFFFF)<<16 + {newPPC64ShiftAuxInt(16, 48, 63, 64), 4, true, 12, 0xFFFF0000}, + // ((x>>32)&0xFFFF)<<17 + {newPPC64ShiftAuxInt(17, 48, 63, 64), 32, false, 0, 0}, + } + for i, v := range tests { + result := mergePPC64ClrlsldiSrw(int64(v.clrlsldi), v.srw) + if v.valid && result == 0 { + t.Errorf("mergePPC64ClrlsldiSrw(Test %d) did not merge", i) + } else if !v.valid && result != 0 { + t.Errorf("mergePPC64ClrlsldiSrw(Test %d) should return 0", i) + } else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m { + t.Errorf("mergePPC64ClrlsldiSrw(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask) + } + } +} + +func TestMergePPC64ClrlsldiRlwinm(t *testing.T) { + tests := []struct { + clrlsldi int32 + rlwinm int64 + valid bool + rotate int64 + mask uint64 + }{ + // ((x<<4)&0xFF00)<<4 + {newPPC64ShiftAuxInt(4, 56, 63, 64), encodePPC64RotateMask(4, 0xFF00, 32), false, 0, 0}, + // ((x>>4)&0xFF)<<4 + {newPPC64ShiftAuxInt(4, 56, 63, 64), encodePPC64RotateMask(28, 0x0FFFFFFF, 32), true, 0, 0xFF0}, + // ((x>>4)&0xFFFF)<<4 + {newPPC64ShiftAuxInt(4, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), true, 0, 0xFFFF0}, + // ((x>>4)&0xFFFF)<<17 + {newPPC64ShiftAuxInt(17, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), false, 0, 0}, + // ((x>>4)&0xFFFF)<<16 + {newPPC64ShiftAuxInt(16, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), true, 12, 0xFFFF0000}, + // ((x>>4)&0xF000FFFF)<<16 + {newPPC64ShiftAuxInt(16, 48, 63, 64), encodePPC64RotateMask(28, 0xF000FFFF, 32), true, 12, 0xFFFF0000}, + } + for i, v := range tests { + result := mergePPC64ClrlsldiRlwinm(v.clrlsldi, v.rlwinm) + if v.valid && result == 0 { + t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) did not merge", i) + } else if !v.valid && result != 0 { + t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) should return 0", i) + } else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m { + t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask) + } + } +} + +func TestMergePPC64SldiSrw(t *testing.T) { + tests := []struct { + sld int64 + srw int64 + valid bool + rotate int64 + mask uint64 + }{ + {4, 4, true, 0, 0xFFFFFFF0}, + {4, 8, true, 28, 0x0FFFFFF0}, + {0, 0, true, 0, 0xFFFFFFFF}, + {8, 4, false, 0, 0}, + {0, 32, false, 0, 0}, + {0, 31, true, 1, 0x1}, + {31, 31, true, 0, 0x80000000}, + {32, 32, false, 0, 0}, + } + for i, v := range tests { + result := mergePPC64SldiSrw(v.sld, v.srw) + if v.valid && result == 0 { + t.Errorf("mergePPC64SldiSrw(Test %d) did not merge", i) + } else if !v.valid && result != 0 { + t.Errorf("mergePPC64SldiSrw(Test %d) should return 0", i) + } else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m { + t.Errorf("mergePPC64SldiSrw(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask) + } + } +} + +func TestMergePPC64AndSrwi(t *testing.T) { + tests := []struct { + and int64 + srw int64 + valid bool + rotate int64 + mask uint64 + }{ + {0x000000FF, 8, true, 24, 0xFF}, + {0xF00000FF, 8, true, 24, 0xFF}, + {0x0F0000FF, 4, false, 0, 0}, + {0x00000000, 4, false, 0, 0}, + {0xF0000000, 4, false, 0, 0}, + {0xF0000000, 32, false, 0, 0}, + } + for i, v := range tests { + result := mergePPC64AndSrwi(v.and, v.srw) + if v.valid && result == 0 { + t.Errorf("mergePPC64AndSrwi(Test %d) did not merge", i) + } else if !v.valid && result != 0 { + t.Errorf("mergePPC64AndSrwi(Test %d) should return 0", i) + } else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m { + t.Errorf("mergePPC64AndSrwi(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask) + } + } +} diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go index ce24b57877..0c8b030970 100644 --- a/test/codegen/rotate.go +++ b/test/codegen/rotate.go @@ -6,6 +6,8 @@ package codegen +import "math/bits" + // ------------------- // // const rotates // // ------------------- // @@ -166,3 +168,46 @@ func f32(x uint32) uint32 { // amd64:"ROLL\t[$]7" return rot32nc(x, 7) } + +// --------------------------------------- // +// Combined Rotate + Masking operations // +// --------------------------------------- // + +func checkMaskedRotate32(a []uint32, r int) { + i := 0 + + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000 + i++ + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+" + a[i] = bits.RotateLeft32(a[i]&0xFF, 16) + i++ + // ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+" + // ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0 + i++ + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+" + a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16) + i++ + + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000 + i++ + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+" + a[i] = bits.RotateLeft32(a[3], r) & 0xFF00 + i++ + + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+" + a[i] = bits.RotateLeft32(a[3], r) & 0xFFF00FFF + i++ + // ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+" + // ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+" + a[i] = bits.RotateLeft32(a[3], 4) & 0xFFF00FFF + i++ +} diff --git a/test/codegen/shift.go b/test/codegen/shift.go index bbfc85ffbb..a45f27c9cf 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -156,29 +156,29 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f := tab[byte(v)^b] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[byte(v)&b] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[byte(v)|b] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[uint16(v)&h] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[uint16(v)^h] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[uint16(v)|h] // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" f += tab[v&0xff] // ppc64le:-".*AND",".*CLRLSLWI" - // ppc64:-".*AND",".*CLRLSLWI" - f += 2*uint32(uint16(d)) + // ppc64:-".*AND",".*CLRLSLWI" + f += 2 * uint32(uint16(d)) // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" - g := 2*uint64(uint32(d)) + g := 2 * uint64(uint32(d)) return f, g } @@ -186,10 +186,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" - f := (v8 &0xF) << 2 + f := (v8 & 0xF) << 2 // ppc64le:"CLRLSLWI" // ppc64:"CLRLSLWI" - f += byte(v16)<<3 + f += byte(v16) << 3 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" g := (v16 & 0xFF) << 3 @@ -207,29 +207,81 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 i += (v64 & 0xFFFF00) << 10 // ppc64le/power9:-"SLD","EXTSWSLI" // ppc64/power9:-"SLD","EXTSWSLI" - j := int64(x32+32)*8 + j := int64(x32+32) * 8 return f, g, h, i, j } func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { // ppc64le:-".*MOVW" - f := int32(v>>32) + f := int32(v >> 32) // ppc64le:".*MOVW" - f += int32(v>>31) + f += int32(v >> 31) // ppc64le:-".*MOVH" - g := int16(v>>48) + g := int16(v >> 48) // ppc64le:".*MOVH" - g += int16(v>>30) + g += int16(v >> 30) // ppc64le:-".*MOVH" - g += int16(f>>16) + g += int16(f >> 16) // ppc64le:-".*MOVB" - h := int8(v>>56) + h := int8(v >> 56) // ppc64le:".*MOVB" - h += int8(v>>28) + h += int8(v >> 28) // ppc64le:-".*MOVB" - h += int8(f>>24) + h += int8(f >> 24) // ppc64le:".*MOVB" - h += int8(f>>16) - return int64(h),uint64(g) + h += int8(f >> 16) + return int64(h), uint64(g) +} + +func checkShiftAndMask32(v []uint32) { + i := 0 + + // ppc64le: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+" + // ppc64: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+" + v[i] = (v[i] & 0xFF00000) >> 8 + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+" + v[i] = (v[i] & 0xFF00) >> 6 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] & 0xFF) >> 8 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] & 0xF000000) >> 28 + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+" + v[i] = (v[i] >> 6) & 0xFF + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+" + v[i] = (v[i] >> 6) & 0xFF000 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] >> 20) & 0xFF000 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] >> 24) & 0xFF00 + i++ +} + +func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) { + //ppc64le: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+" + //ppc64: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+" + a[0] = a[uint8(v>>24)] + //ppc64le: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+" + //ppc64: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+" + b[0] = b[uint8(v>>24)] + //ppc64le: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+" + //ppc64: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+" + b[1] = b[(v>>20)&0xFF] + //ppc64le: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+" + //ppc64: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+" + b[2] = b[v>>25] } -- cgit v1.3 From 854e892ce17e2555c59fce5b92f64bc505ba5d8c Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Mon, 11 May 2020 09:44:48 -0700 Subject: cmd/compile: optimize shift pairs and masks on s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize combinations of left and right shifts by a constant value into a 'rotate then insert selected bits [into zero]' instruction. Use the same instruction for contiguous masks since it has some benefits over 'and immediate' (not restricted to 32-bits, does not overwrite source register). To keep the complexity of this change under control I've only implemented 64 bit operations for now. There are a lot more optimizations that can be done with this instruction family. However, since their function overlaps with other instructions we need to be somewhat careful not to break existing optimization rules by creating optimization dead ends. This is particularly true of the load/store merging rules which contain lots of zero extensions and shifts. This CL does interfere with the store merging rules when an operand is shifted left before it is stored: binary.BigEndian.PutUint64(b, x << 1) This is unfortunate but it's not critical and somewhat complex so I plan to fix that in a follow up CL. file before after Δ % addr2line 4117446 4117282 -164 -0.004% api 4945184 4942752 -2432 -0.049% asm 4998079 4991891 -6188 -0.124% buildid 2685158 2684074 -1084 -0.040% cgo 4553732 4553394 -338 -0.007% compile 19294446 19245070 -49376 -0.256% cover 4897105 4891319 -5786 -0.118% dist 3544389 3542785 -1604 -0.045% doc 3926795 3927617 +822 +0.021% fix 3302958 3293868 -9090 -0.275% link 6546274 6543456 -2818 -0.043% nm 4102021 4100825 -1196 -0.029% objdump 4542431 4548483 +6052 +0.133% pack 2482465 2416389 -66076 -2.662% pprof 13366541 13363915 -2626 -0.020% test2json 2829007 2761515 -67492 -2.386% trace 10216164 10219684 +3520 +0.034% vet 6773956 6773572 -384 -0.006% total 107124151 106917891 -206260 -0.193% Change-Id: I7591cce41e06867ba10a745daae9333513062746 Reviewed-on: https://go-review.googlesource.com/c/go/+/233317 Run-TryBot: Michael Munday TryBot-Result: Go Bot Reviewed-by: Keith Randall Trust: Michael Munday --- src/cmd/compile/internal/s390x/ssa.go | 14 +- src/cmd/compile/internal/ssa/gen/S390X.rules | 162 ++++- src/cmd/compile/internal/ssa/gen/S390XOps.go | 20 +- src/cmd/compile/internal/ssa/opGen.go | 25 +- src/cmd/compile/internal/ssa/rewriteS390X.go | 844 +++++++++++++++++++-------- src/cmd/internal/obj/s390x/rotate.go | 82 ++- src/cmd/internal/obj/s390x/rotate_test.go | 122 ++++ test/codegen/bitfield.go | 18 +- test/codegen/bits.go | 12 + test/codegen/mathbits.go | 2 +- test/codegen/rotate.go | 6 +- test/codegen/shift.go | 33 +- 12 files changed, 1007 insertions(+), 333 deletions(-) create mode 100644 src/cmd/internal/obj/s390x/rotate_test.go (limited to 'test/codegen/shift.go') diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index 84b9f491e4..8037357131 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -188,6 +188,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { {Type: obj.TYPE_REG, Reg: r2}, }) p.To = obj.Addr{Type: obj.TYPE_REG, Reg: r1} + case ssa.OpS390XRISBGZ: + r1 := v.Reg() + r2 := v.Args[0].Reg() + i := v.Aux.(s390x.RotateParams) + p := s.Prog(v.Op.Asm()) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(i.Start)} + p.SetRestArgs([]obj.Addr{ + {Type: obj.TYPE_CONST, Offset: int64(i.End)}, + {Type: obj.TYPE_CONST, Offset: int64(i.Amount)}, + {Type: obj.TYPE_REG, Reg: r2}, + }) + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: r1} case ssa.OpS390XADD, ssa.OpS390XADDW, ssa.OpS390XSUB, ssa.OpS390XSUBW, ssa.OpS390XAND, ssa.OpS390XANDW, @@ -360,7 +372,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { case ssa.OpS390XSLDconst, ssa.OpS390XSLWconst, ssa.OpS390XSRDconst, ssa.OpS390XSRWconst, ssa.OpS390XSRADconst, ssa.OpS390XSRAWconst, - ssa.OpS390XRLLGconst, ssa.OpS390XRLLconst: + ssa.OpS390XRLLconst: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules index 1b56361c00..39949edbc2 100644 --- a/src/cmd/compile/internal/ssa/gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/gen/S390X.rules @@ -643,8 +643,18 @@ // equivalent to the leftmost 32 bits being set. // TODO(mundaym): modify the assembler to accept 64-bit values // and use isU32Bit(^c). -(AND x (MOVDconst [c])) && is32Bit(c) && c < 0 => (ANDconst [c] x) -(AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 => (MOVWZreg (ANDWconst [int32(c)] x)) +(AND x (MOVDconst [c])) + && s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c)) != nil + => (RISBGZ x {*s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c))}) +(AND x (MOVDconst [c])) + && is32Bit(c) + && c < 0 + => (ANDconst [c] x) +(AND x (MOVDconst [c])) + && is32Bit(c) + && c >= 0 + => (MOVWZreg (ANDWconst [int32(c)] x)) + (ANDW x (MOVDconst [c])) => (ANDWconst [int32(c)] x) ((AND|ANDW)const [c] ((AND|ANDW)const [d] x)) => ((AND|ANDW)const [c&d] x) @@ -653,14 +663,20 @@ ((OR|XOR)W x (MOVDconst [c])) => ((OR|XOR)Wconst [int32(c)] x) // Constant shifts. -(S(LD|RD|RAD|LW|RW|RAW) x (MOVDconst [c])) - => (S(LD|RD|RAD|LW|RW|RAW)const x [int8(c&63)]) +(S(LD|RD|RAD) x (MOVDconst [c])) => (S(LD|RD|RAD)const x [int8(c&63)]) +(S(LW|RW|RAW) x (MOVDconst [c])) && c&32 == 0 => (S(LW|RW|RAW)const x [int8(c&31)]) +(S(LW|RW) _ (MOVDconst [c])) && c&32 != 0 => (MOVDconst [0]) +(SRAW x (MOVDconst [c])) && c&32 != 0 => (SRAWconst x [31]) // Shifts only use the rightmost 6 bits of the shift value. +(S(LD|RD|RAD|LW|RW|RAW) x (RISBGZ y {r})) + && r.Amount == 0 + && r.OutMask()&63 == 63 + => (S(LD|RD|RAD|LW|RW|RAW) x y) (S(LD|RD|RAD|LW|RW|RAW) x (AND (MOVDconst [c]) y)) - => (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [int32(c&63)] y)) + => (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [int32(c&63)] y)) (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [c] y)) && c&63 == 63 - => (S(LD|RD|RAD|LW|RW|RAW) x y) + => (S(LD|RD|RAD|LW|RW|RAW) x y) (SLD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SLD x y) (SRD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRD x y) (SRAD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRAD x y) @@ -668,17 +684,13 @@ (SRW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRW x y) (SRAW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRAW x y) -// Constant rotate generation -(RLL x (MOVDconst [c])) => (RLLconst x [int8(c&31)]) -(RLLG x (MOVDconst [c])) => (RLLGconst x [int8(c&63)]) - -(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x) -( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x) -(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x) +// Match rotate by constant. +(RLLG x (MOVDconst [c])) => (RISBGZ x {s390x.NewRotateParams(0, 63, int8(c&63))}) +(RLL x (MOVDconst [c])) => (RLLconst x [int8(c&31)]) -(ADDW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x) -( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x) -(XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x) +// Match rotate by constant pattern. +((ADD|OR|XOR) (SLDconst x [c]) (SRDconst x [64-c])) => (RISBGZ x {s390x.NewRotateParams(0, 63, c)}) +((ADD|OR|XOR)W (SLWconst x [c]) (SRWconst x [32-c])) => (RLLconst x [c]) // Signed 64-bit comparison with immediate. (CMP x (MOVDconst [c])) && is32Bit(c) => (CMPconst x [int32(c)]) @@ -692,15 +704,97 @@ (CMP(W|WU) x (MOVDconst [c])) => (CMP(W|WU)const x [int32(c)]) (CMP(W|WU) (MOVDconst [c]) x) => (InvertFlags (CMP(W|WU)const x [int32(c)])) +// Match (x >> c) << d to 'rotate then insert selected bits [into zero]'. +(SLDconst (SRDconst x [c]) [d]) => (RISBGZ x {s390x.NewRotateParams(max8(0, c-d), 63-d, (d-c)&63)}) + +// Match (x << c) >> d to 'rotate then insert selected bits [into zero]'. +(SRDconst (SLDconst x [c]) [d]) => (RISBGZ x {s390x.NewRotateParams(d, min8(63, 63-c+d), (c-d)&63)}) + +// Absorb input zero extension into 'rotate then insert selected bits [into zero]'. +(RISBGZ (MOVWZreg x) {r}) && r.InMerge(0xffffffff) != nil => (RISBGZ x {*r.InMerge(0xffffffff)}) +(RISBGZ (MOVHZreg x) {r}) && r.InMerge(0x0000ffff) != nil => (RISBGZ x {*r.InMerge(0x0000ffff)}) +(RISBGZ (MOVBZreg x) {r}) && r.InMerge(0x000000ff) != nil => (RISBGZ x {*r.InMerge(0x000000ff)}) + +// Absorb 'rotate then insert selected bits [into zero]' into zero extension. +(MOVWZreg (RISBGZ x {r})) && r.OutMerge(0xffffffff) != nil => (RISBGZ x {*r.OutMerge(0xffffffff)}) +(MOVHZreg (RISBGZ x {r})) && r.OutMerge(0x0000ffff) != nil => (RISBGZ x {*r.OutMerge(0x0000ffff)}) +(MOVBZreg (RISBGZ x {r})) && r.OutMerge(0x000000ff) != nil => (RISBGZ x {*r.OutMerge(0x000000ff)}) + +// Absorb shift into 'rotate then insert selected bits [into zero]'. +// +// Any unsigned shift can be represented as a rotate and mask operation: +// +// x << c => RotateLeft64(x, c) & (^uint64(0) << c) +// x >> c => RotateLeft64(x, -c) & (^uint64(0) >> c) +// +// Therefore when a shift is used as the input to a rotate then insert +// selected bits instruction we can merge the two together. We just have +// to be careful that the resultant mask is representable (non-zero and +// contiguous). For example, assuming that x is variable and c, y and m +// are constants, a shift followed by a rotate then insert selected bits +// could be represented as: +// +// RotateLeft64(RotateLeft64(x, c) & (^uint64(0) << c), y) & m +// +// We can split the rotation by y into two, one rotate for x and one for +// the mask: +// +// RotateLeft64(RotateLeft64(x, c), y) & (RotateLeft64(^uint64(0) << c, y)) & m +// +// The rotations of x by c followed by y can then be combined: +// +// RotateLeft64(x, c+y) & (RotateLeft64(^uint64(0) << c, y)) & m +// ^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// rotate mask +// +// To perform this optimization we therefore just need to check that it +// is valid to merge the shift mask (^(uint64(0)< (RISBGZ x {(*r.InMerge(^uint64(0)<>c) != nil => (RISBGZ x {(*r.InMerge(^uint64(0)>>c)).RotateLeft(-c)}) + +// Absorb 'rotate then insert selected bits [into zero]' into left shift. +(SLDconst (RISBGZ x {r}) [c]) + && s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask()) != nil + => (RISBGZ x {(*s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask())).RotateLeft(r.Amount)}) + +// Absorb 'rotate then insert selected bits [into zero]' into right shift. +(SRDconst (RISBGZ x {r}) [c]) + && s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask()) != nil + => (RISBGZ x {(*s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask())).RotateLeft(r.Amount)}) + +// Merge 'rotate then insert selected bits [into zero]' instructions together. +(RISBGZ (RISBGZ x {y}) {z}) + && z.InMerge(y.OutMask()) != nil + => (RISBGZ x {(*z.InMerge(y.OutMask())).RotateLeft(y.Amount)}) + +// Convert RISBGZ into 64-bit shift (helps CSE). +(RISBGZ x {r}) && r.End == 63 && r.Start == -r.Amount&63 => (SRDconst x [-r.Amount&63]) +(RISBGZ x {r}) && r.Start == 0 && r.End == 63-r.Amount => (SLDconst x [r.Amount]) + +// Optimize single bit isolation when it is known to be equivalent to +// the most significant bit due to mask produced by arithmetic shift. +// Simply isolate the most significant bit itself and place it in the +// correct position. +// +// Example: (int64(x) >> 63) & 0x8 -> RISBGZ $60, $60, $4, Rsrc, Rdst +(RISBGZ (SRADconst x [c]) {r}) + && r.Start == r.End // single bit selected + && (r.Start+r.Amount)&63 <= c // equivalent to most significant bit of x + => (RISBGZ x {s390x.NewRotateParams(r.Start, r.Start, -r.Start&63)}) + // Canonicalize the order of arguments to comparisons - helps with CSE. ((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID => (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x)) -// Using MOV{W,H,B}Zreg instead of AND is cheaper. -(AND x (MOVDconst [0xFF])) => (MOVBZreg x) -(AND x (MOVDconst [0xFFFF])) => (MOVHZreg x) -(AND x (MOVDconst [0xFFFFFFFF])) => (MOVWZreg x) -(ANDWconst [0xFF] x) => (MOVBZreg x) -(ANDWconst [0xFFFF] x) => (MOVHZreg x) +// Use sign/zero extend instead of RISBGZ. +(RISBGZ x {r}) && r == s390x.NewRotateParams(56, 63, 0) => (MOVBZreg x) +(RISBGZ x {r}) && r == s390x.NewRotateParams(48, 63, 0) => (MOVHZreg x) +(RISBGZ x {r}) && r == s390x.NewRotateParams(32, 63, 0) => (MOVWZreg x) + +// Use sign/zero extend instead of ANDW. +(ANDWconst [0x00ff] x) => (MOVBZreg x) +(ANDWconst [0xffff] x) => (MOVHZreg x) // Strength reduce multiplication to the sum (or difference) of two powers of two. // @@ -773,21 +867,22 @@ // detect attempts to set/clear the sign bit // may need to be reworked when NIHH/OIHH are added -(SRDconst [1] (SLDconst [1] (LGDR x))) => (LGDR (LPDFR x)) -(LDGR (SRDconst [1] (SLDconst [1] x))) => (LPDFR (LDGR x)) -(AND (MOVDconst [^(-1<<63)]) (LGDR x)) => (LGDR (LPDFR x)) -(LDGR (AND (MOVDconst [^(-1<<63)]) x)) => (LPDFR (LDGR x)) -(OR (MOVDconst [-1<<63]) (LGDR x)) => (LGDR (LNDFR x)) -(LDGR (OR (MOVDconst [-1<<63]) x)) => (LNDFR (LDGR x)) +(RISBGZ (LGDR x) {r}) && r == s390x.NewRotateParams(1, 63, 0) => (LGDR (LPDFR x)) +(LDGR (RISBGZ x {r})) && r == s390x.NewRotateParams(1, 63, 0) => (LPDFR (LDGR x)) +(OR (MOVDconst [-1<<63]) (LGDR x)) => (LGDR (LNDFR x)) +(LDGR (OR (MOVDconst [-1<<63]) x)) => (LNDFR (LDGR x)) // detect attempts to set the sign bit with load (LDGR x:(ORload [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (LNDFR (LDGR (MOVDload [off] {sym} ptr mem))) // detect copysign -(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR y))) => (LGDR (CPSDR y x)) -(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 => (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) -(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR y))) => (LGDR (CPSDR y x)) -(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 => (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) +(OR (RISBGZ (LGDR x) {r}) (LGDR (LPDFR y))) + && r == s390x.NewRotateParams(0, 0, 0) + => (LGDR (CPSDR y x)) +(OR (RISBGZ (LGDR x) {r}) (MOVDconst [c])) + && c >= 0 + && r == s390x.NewRotateParams(0, 0, 0) + => (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) (CPSDR y (FMOVDconst [c])) && !math.Signbit(c) => (LPDFR y) (CPSDR y (FMOVDconst [c])) && math.Signbit(c) => (LNDFR y) @@ -966,6 +1061,9 @@ (CMPWconst (ANDWconst _ [m]) [n]) && int32(m) >= 0 && int32(m) < int32(n) => (FlagLT) (CMPWUconst (ANDWconst _ [m]) [n]) && uint32(m) < uint32(n) => (FlagLT) +(CMPconst (RISBGZ x {r}) [c]) && c > 0 && r.OutMask() < uint64(c) => (FlagLT) +(CMPUconst (RISBGZ x {r}) [c]) && r.OutMask() < uint64(uint32(c)) => (FlagLT) + // Constant compare-and-branch with immediate. (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal != 0 && int64(x) == int64(y) => (First yes no) (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less != 0 && int64(x) < int64(y) => (First yes no) diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go index 728cfb5508..f0cf2f2f6e 100644 --- a/src/cmd/compile/internal/ssa/gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go @@ -331,25 +331,26 @@ func init() { {name: "LTEBR", argLength: 1, reg: fp1flags, asm: "LTEBR", typ: "Flags"}, // arg0 compare to 0, f32 {name: "SLD", argLength: 2, reg: sh21, asm: "SLD"}, // arg0 << arg1, shift amount is mod 64 - {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"}, // arg0 << arg1, shift amount is mod 32 + {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"}, // arg0 << arg1, shift amount is mod 64 {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int8"}, // arg0 << auxint, shift amount 0-63 {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int8"}, // arg0 << auxint, shift amount 0-31 {name: "SRD", argLength: 2, reg: sh21, asm: "SRD"}, // unsigned arg0 >> arg1, shift amount is mod 64 - {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 32 + {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 64 {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int8"}, // unsigned arg0 >> auxint, shift amount 0-63 {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int8"}, // unsigned uint32(arg0) >> auxint, shift amount 0-31 // Arithmetic shifts clobber flags. {name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true}, // signed arg0 >> arg1, shift amount is mod 64 - {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true}, // signed int32(arg0) >> arg1, shift amount is mod 32 + {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true}, // signed int32(arg0) >> arg1, shift amount is mod 64 {name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63 {name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int8", clobberFlags: true}, // signed int32(arg0) >> auxint, shift amount 0-31 - {name: "RLLG", argLength: 2, reg: sh21, asm: "RLLG"}, // arg0 rotate left arg1, rotate amount 0-63 - {name: "RLL", argLength: 2, reg: sh21, asm: "RLL"}, // arg0 rotate left arg1, rotate amount 0-31 - {name: "RLLGconst", argLength: 1, reg: gp11, asm: "RLLG", aux: "Int8"}, // arg0 rotate left auxint, rotate amount 0-63 - {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "Int8"}, // arg0 rotate left auxint, rotate amount 0-31 + // Rotate instructions. + // Note: no RLLGconst - use RISBGZ instead. + {name: "RLLG", argLength: 2, reg: sh21, asm: "RLLG"}, // arg0 rotate left arg1, rotate amount 0-63 + {name: "RLL", argLength: 2, reg: sh21, asm: "RLL"}, // arg0 rotate left arg1, rotate amount 0-31 + {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "Int8"}, // arg0 rotate left auxint, rotate amount 0-31 // Rotate then (and|or|xor|insert) selected bits instructions. // @@ -371,6 +372,7 @@ func init() { // +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+ // {name: "RXSBG", argLength: 2, reg: gp21, asm: "RXSBG", resultInArg0: true, aux: "S390XRotateParams", clobberFlags: true}, // rotate then xor selected bits + {name: "RISBGZ", argLength: 1, reg: gp11, asm: "RISBGZ", aux: "S390XRotateParams", clobberFlags: true}, // rotate then insert selected bits [into zero] // unary ops {name: "NEG", argLength: 1, reg: gp11, asm: "NEG", clobberFlags: true}, // -arg0 @@ -547,9 +549,9 @@ func init() { // Atomic bitwise operations. // Note: 'floor' operations round the pointer down to the nearest word boundary // which reflects how they are used in the runtime. - {name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 &= arg1. arg2 = mem. + {name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 &= arg1. arg2 = mem. {name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem. - {name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 |= arg1. arg2 = mem. + {name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 |= arg1. arg2 = mem. {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem. // Compare and swap. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index c0b663cd8f..eceef1d91a 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2285,9 +2285,9 @@ const ( OpS390XSRAWconst OpS390XRLLG OpS390XRLL - OpS390XRLLGconst OpS390XRLLconst OpS390XRXSBG + OpS390XRISBGZ OpS390XNEG OpS390XNEGW OpS390XNOT @@ -30740,10 +30740,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "RLLGconst", + name: "RLLconst", auxType: auxInt8, argLen: 1, - asm: s390x.ARLLG, + asm: s390x.ARLL, reg: regInfo{ inputs: []inputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 @@ -30754,13 +30754,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "RLLconst", - auxType: auxInt8, - argLen: 1, - asm: s390x.ARLL, + name: "RXSBG", + auxType: auxS390XRotateParams, + argLen: 2, + resultInArg0: true, + clobberFlags: true, + asm: s390x.ARXSBG, reg: regInfo{ inputs: []inputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 + {1, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 }, outputs: []outputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 @@ -30768,16 +30771,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "RXSBG", + name: "RISBGZ", auxType: auxS390XRotateParams, - argLen: 2, - resultInArg0: true, + argLen: 1, clobberFlags: true, - asm: s390x.ARXSBG, + asm: s390x.ARISBGZ, reg: regInfo{ inputs: []inputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 - {1, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 }, outputs: []outputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go index 8c3c61d584..d66113d111 100644 --- a/src/cmd/compile/internal/ssa/rewriteS390X.go +++ b/src/cmd/compile/internal/ssa/rewriteS390X.go @@ -699,6 +699,8 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XORconst(v) case OpS390XORload: return rewriteValueS390X_OpS390XORload(v) + case OpS390XRISBGZ: + return rewriteValueS390X_OpS390XRISBGZ(v) case OpS390XRLL: return rewriteValueS390X_OpS390XRLL(v) case OpS390XRLLG: @@ -5272,9 +5274,8 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool { } break } - // match: (ADD (SLDconst x [c]) (SRDconst x [d])) - // cond: d == 64-c - // result: (RLLGconst [c] x) + // match: (ADD (SLDconst x [c]) (SRDconst x [64-c])) + // result: (RISBGZ x {s390x.NewRotateParams(0, 63, c)}) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLDconst { @@ -5282,15 +5283,11 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRDconst { + if v_1.Op != OpS390XSRDconst || auxIntToInt8(v_1.AuxInt) != 64-c || x != v_1.Args[0] { continue } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 64-c) { - continue - } - v.reset(OpS390XRLLGconst) - v.AuxInt = int8ToAuxInt(c) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(0, 63, c)) v.AddArg(x) return true } @@ -5470,9 +5467,8 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool { } break } - // match: (ADDW (SLWconst x [c]) (SRWconst x [d])) - // cond: d == 32-c - // result: (RLLconst [c] x) + // match: (ADDW (SLWconst x [c]) (SRWconst x [32-c])) + // result: (RLLconst x [c]) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLWconst { @@ -5480,11 +5476,7 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRWconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 32-c) { + if v_1.Op != OpS390XSRWconst || auxIntToInt8(v_1.AuxInt) != 32-c || x != v_1.Args[0] { continue } v.reset(OpS390XRLLconst) @@ -5844,8 +5836,8 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (AND x (MOVDconst [c])) - // cond: is32Bit(c) && c < 0 - // result: (ANDconst [c] x) + // cond: s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c)) != nil + // result: (RISBGZ x {*s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c))}) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 @@ -5853,19 +5845,19 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool { continue } c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c) && c < 0) { + if !(s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c)) != nil) { continue } - v.reset(OpS390XANDconst) - v.AuxInt = int64ToAuxInt(c) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c))) v.AddArg(x) return true } break } // match: (AND x (MOVDconst [c])) - // cond: is32Bit(c) && c >= 0 - // result: (MOVWZreg (ANDWconst [int32(c)] x)) + // cond: is32Bit(c) && c < 0 + // result: (ANDconst [c] x) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 @@ -5873,72 +5865,32 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool { continue } c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c) && c >= 0) { - continue - } - v.reset(OpS390XMOVWZreg) - v0 := b.NewValue0(v.Pos, OpS390XANDWconst, typ.UInt32) - v0.AuxInt = int32ToAuxInt(int32(c)) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break - } - // match: (AND x (MOVDconst [0xFF])) - // result: (MOVBZreg x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpS390XMOVDconst || auxIntToInt64(v_1.AuxInt) != 0xFF { + if !(is32Bit(c) && c < 0) { continue } - v.reset(OpS390XMOVBZreg) + v.reset(OpS390XANDconst) + v.AuxInt = int64ToAuxInt(c) v.AddArg(x) return true } break } - // match: (AND x (MOVDconst [0xFFFF])) - // result: (MOVHZreg x) + // match: (AND x (MOVDconst [c])) + // cond: is32Bit(c) && c >= 0 + // result: (MOVWZreg (ANDWconst [int32(c)] x)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpS390XMOVDconst || auxIntToInt64(v_1.AuxInt) != 0xFFFF { + if v_1.Op != OpS390XMOVDconst { continue } - v.reset(OpS390XMOVHZreg) - v.AddArg(x) - return true - } - break - } - // match: (AND x (MOVDconst [0xFFFFFFFF])) - // result: (MOVWZreg x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpS390XMOVDconst || auxIntToInt64(v_1.AuxInt) != 0xFFFFFFFF { + c := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(c) && c >= 0) { continue } v.reset(OpS390XMOVWZreg) - v.AddArg(x) - return true - } - break - } - // match: (AND (MOVDconst [^(-1<<63)]) (LGDR x)) - // result: (LGDR (LPDFR x)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XMOVDconst || auxIntToInt64(v_0.AuxInt) != ^(-1<<63) || v_1.Op != OpS390XLGDR { - continue - } - t := v_1.Type - x := v_1.Args[0] - v.reset(OpS390XLGDR) - v.Type = t - v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) + v0 := b.NewValue0(v.Pos, OpS390XANDWconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(int32(c)) v0.AddArg(x) v.AddArg(v0) return true @@ -6103,10 +6055,10 @@ func rewriteValueS390X_OpS390XANDWconst(v *Value) bool { v.AddArg(x) return true } - // match: (ANDWconst [0xFF] x) + // match: (ANDWconst [0x00ff] x) // result: (MOVBZreg x) for { - if auxIntToInt32(v.AuxInt) != 0xFF { + if auxIntToInt32(v.AuxInt) != 0x00ff { break } x := v_0 @@ -6114,10 +6066,10 @@ func rewriteValueS390X_OpS390XANDWconst(v *Value) bool { v.AddArg(x) return true } - // match: (ANDWconst [0xFFFF] x) + // match: (ANDWconst [0xffff] x) // result: (MOVHZreg x) for { - if auxIntToInt32(v.AuxInt) != 0xFFFF { + if auxIntToInt32(v.AuxInt) != 0xffff { break } x := v_0 @@ -6515,6 +6467,21 @@ func rewriteValueS390X_OpS390XCMPUconst(v *Value) bool { v.reset(OpS390XFlagLT) return true } + // match: (CMPUconst (RISBGZ x {r}) [c]) + // cond: r.OutMask() < uint64(uint32(c)) + // result: (FlagLT) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + if !(r.OutMask() < uint64(uint32(c))) { + break + } + v.reset(OpS390XFlagLT) + return true + } // match: (CMPUconst (MOVWZreg x) [c]) // result: (CMPWUconst x [c]) for { @@ -7152,6 +7119,21 @@ func rewriteValueS390X_OpS390XCMPconst(v *Value) bool { v.reset(OpS390XFlagGT) return true } + // match: (CMPconst (RISBGZ x {r}) [c]) + // cond: c > 0 && r.OutMask() < uint64(c) + // result: (FlagLT) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + if !(c > 0 && r.OutMask() < uint64(c)) { + break + } + v.reset(OpS390XFlagLT) + return true + } // match: (CMPconst (MOVWreg x) [c]) // result: (CMPWconst x [c]) for { @@ -7684,47 +7666,25 @@ func rewriteValueS390X_OpS390XFNEGS(v *Value) bool { func rewriteValueS390X_OpS390XLDGR(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (LDGR (SRDconst [1] (SLDconst [1] x))) + // match: (LDGR (RISBGZ x {r})) + // cond: r == s390x.NewRotateParams(1, 63, 0) // result: (LPDFR (LDGR x)) for { t := v.Type - if v_0.Op != OpS390XSRDconst || auxIntToInt8(v_0.AuxInt) != 1 { + if v_0.Op != OpS390XRISBGZ { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpS390XSLDconst || auxIntToInt8(v_0_0.AuxInt) != 1 { + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(r == s390x.NewRotateParams(1, 63, 0)) { break } - x := v_0_0.Args[0] v.reset(OpS390XLPDFR) v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) v0.AddArg(x) v.AddArg(v0) return true } - // match: (LDGR (AND (MOVDconst [^(-1<<63)]) x)) - // result: (LPDFR (LDGR x)) - for { - t := v.Type - if v_0.Op != OpS390XAND { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpS390XMOVDconst || auxIntToInt64(v_0_0.AuxInt) != ^(-1<<63) { - continue - } - x := v_0_1 - v.reset(OpS390XLPDFR) - v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break - } // match: (LDGR (OR (MOVDconst [-1<<63]) x)) // result: (LNDFR (LDGR x)) for { @@ -8309,6 +8269,23 @@ func rewriteValueS390X_OpS390XMOVBZreg(v *Value) bool { v.copyOf(x) return true } + // match: (MOVBZreg (RISBGZ x {r})) + // cond: r.OutMerge(0x000000ff) != nil + // result: (RISBGZ x {*r.OutMerge(0x000000ff)}) + for { + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(r.OutMerge(0x000000ff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.OutMerge(0x000000ff)) + v.AddArg(x) + return true + } // match: (MOVBZreg (ANDWconst [m] x)) // result: (MOVWZreg (ANDWconst [int32( uint8(m))] x)) for { @@ -9697,6 +9674,23 @@ func rewriteValueS390X_OpS390XMOVHZreg(v *Value) bool { v.AuxInt = int64ToAuxInt(int64(uint16(c))) return true } + // match: (MOVHZreg (RISBGZ x {r})) + // cond: r.OutMerge(0x0000ffff) != nil + // result: (RISBGZ x {*r.OutMerge(0x0000ffff)}) + for { + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(r.OutMerge(0x0000ffff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.OutMerge(0x0000ffff)) + v.AddArg(x) + return true + } // match: (MOVHZreg (ANDWconst [m] x)) // result: (MOVWZreg (ANDWconst [int32(uint16(m))] x)) for { @@ -10547,6 +10541,23 @@ func rewriteValueS390X_OpS390XMOVWZreg(v *Value) bool { v.AuxInt = int64ToAuxInt(int64(uint32(c))) return true } + // match: (MOVWZreg (RISBGZ x {r})) + // cond: r.OutMerge(0xffffffff) != nil + // result: (RISBGZ x {*r.OutMerge(0xffffffff)}) + for { + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(r.OutMerge(0xffffffff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.OutMerge(0xffffffff)) + v.AddArg(x) + return true + } return false } func rewriteValueS390X_OpS390XMOVWload(v *Value) bool { @@ -11622,9 +11633,8 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } break } - // match: ( OR (SLDconst x [c]) (SRDconst x [d])) - // cond: d == 64-c - // result: (RLLGconst [c] x) + // match: (OR (SLDconst x [c]) (SRDconst x [64-c])) + // result: (RISBGZ x {s390x.NewRotateParams(0, 63, c)}) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLDconst { @@ -11632,15 +11642,11 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRDconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 64-c) { + if v_1.Op != OpS390XSRDconst || auxIntToInt8(v_1.AuxInt) != 64-c || x != v_1.Args[0] { continue } - v.reset(OpS390XRLLGconst) - v.AuxInt = int8ToAuxInt(c) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(0, 63, c)) v.AddArg(x) return true } @@ -11664,22 +11670,20 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } break } - // match: (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR y))) + // match: (OR (RISBGZ (LGDR x) {r}) (LGDR (LPDFR y))) + // cond: r == s390x.NewRotateParams(0, 0, 0) // result: (LGDR (CPSDR y x)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XSLDconst || auxIntToInt8(v_0.AuxInt) != 63 { + if v_0.Op != OpS390XRISBGZ { continue } + r := auxToS390xRotateParams(v_0.Aux) v_0_0 := v_0.Args[0] - if v_0_0.Op != OpS390XSRDconst || auxIntToInt8(v_0_0.AuxInt) != 63 { - continue - } - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpS390XLGDR { + if v_0_0.Op != OpS390XLGDR { continue } - x := v_0_0_0.Args[0] + x := v_0_0.Args[0] if v_1.Op != OpS390XLGDR { continue } @@ -11689,6 +11693,9 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } t := v_1_0.Type y := v_1_0.Args[0] + if !(r == s390x.NewRotateParams(0, 0, 0)) { + continue + } v.reset(OpS390XLGDR) v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) v0.AddArg2(y, x) @@ -11697,28 +11704,25 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } break } - // match: (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) - // cond: c & -1<<63 == 0 + // match: (OR (RISBGZ (LGDR x) {r}) (MOVDconst [c])) + // cond: c >= 0 && r == s390x.NewRotateParams(0, 0, 0) // result: (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XSLDconst || auxIntToInt8(v_0.AuxInt) != 63 { + if v_0.Op != OpS390XRISBGZ { continue } + r := auxToS390xRotateParams(v_0.Aux) v_0_0 := v_0.Args[0] - if v_0_0.Op != OpS390XSRDconst || auxIntToInt8(v_0_0.AuxInt) != 63 { + if v_0_0.Op != OpS390XLGDR { continue } - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpS390XLGDR { - continue - } - x := v_0_0_0.Args[0] + x := v_0_0.Args[0] if v_1.Op != OpS390XMOVDconst { continue } c := auxIntToInt64(v_1.AuxInt) - if !(c&-1<<63 == 0) { + if !(c >= 0 && r == s390x.NewRotateParams(0, 0, 0)) { continue } v.reset(OpS390XLGDR) @@ -11731,73 +11735,6 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } break } - // match: (OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR y))) - // result: (LGDR (CPSDR y x)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XAND { - continue - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { - if v_0_0.Op != OpS390XMOVDconst || auxIntToInt64(v_0_0.AuxInt) != -1<<63 || v_0_1.Op != OpS390XLGDR { - continue - } - x := v_0_1.Args[0] - if v_1.Op != OpS390XLGDR { - continue - } - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpS390XLPDFR { - continue - } - t := v_1_0.Type - y := v_1_0.Args[0] - v.reset(OpS390XLGDR) - v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) - v0.AddArg2(y, x) - v.AddArg(v0) - return true - } - } - break - } - // match: (OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) - // cond: c & -1<<63 == 0 - // result: (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XAND { - continue - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { - if v_0_0.Op != OpS390XMOVDconst || auxIntToInt64(v_0_0.AuxInt) != -1<<63 || v_0_1.Op != OpS390XLGDR { - continue - } - x := v_0_1.Args[0] - if v_1.Op != OpS390XMOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c&-1<<63 == 0) { - continue - } - v.reset(OpS390XLGDR) - v0 := b.NewValue0(v.Pos, OpS390XCPSDR, x.Type) - v1 := b.NewValue0(v.Pos, OpS390XFMOVDconst, x.Type) - v1.AuxInt = float64ToAuxInt(math.Float64frombits(uint64(c))) - v0.AddArg2(v1, x) - v.AddArg(v0) - return true - } - } - break - } // match: (OR (MOVDconst [c]) (MOVDconst [d])) // result: (MOVDconst [c|d]) for { @@ -12394,9 +12331,8 @@ func rewriteValueS390X_OpS390XORW(v *Value) bool { } break } - // match: ( ORW (SLWconst x [c]) (SRWconst x [d])) - // cond: d == 32-c - // result: (RLLconst [c] x) + // match: (ORW (SLWconst x [c]) (SRWconst x [32-c])) + // result: (RLLconst x [c]) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLWconst { @@ -12404,11 +12340,7 @@ func rewriteValueS390X_OpS390XORW(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRWconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 32-c) { + if v_1.Op != OpS390XSRWconst || auxIntToInt8(v_1.AuxInt) != 32-c || x != v_1.Args[0] { continue } v.reset(OpS390XRLLconst) @@ -12980,6 +12912,221 @@ func rewriteValueS390X_OpS390XORload(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XRISBGZ(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (RISBGZ (MOVWZreg x) {r}) + // cond: r.InMerge(0xffffffff) != nil + // result: (RISBGZ x {*r.InMerge(0xffffffff)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XMOVWZreg { + break + } + x := v_0.Args[0] + if !(r.InMerge(0xffffffff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.InMerge(0xffffffff)) + v.AddArg(x) + return true + } + // match: (RISBGZ (MOVHZreg x) {r}) + // cond: r.InMerge(0x0000ffff) != nil + // result: (RISBGZ x {*r.InMerge(0x0000ffff)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XMOVHZreg { + break + } + x := v_0.Args[0] + if !(r.InMerge(0x0000ffff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.InMerge(0x0000ffff)) + v.AddArg(x) + return true + } + // match: (RISBGZ (MOVBZreg x) {r}) + // cond: r.InMerge(0x000000ff) != nil + // result: (RISBGZ x {*r.InMerge(0x000000ff)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XMOVBZreg { + break + } + x := v_0.Args[0] + if !(r.InMerge(0x000000ff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.InMerge(0x000000ff)) + v.AddArg(x) + return true + } + // match: (RISBGZ (SLDconst x [c]) {r}) + // cond: r.InMerge(^uint64(0)<>c) != nil + // result: (RISBGZ x {(*r.InMerge(^uint64(0)>>c)).RotateLeft(-c)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XSRDconst { + break + } + c := auxIntToInt8(v_0.AuxInt) + x := v_0.Args[0] + if !(r.InMerge(^uint64(0)>>c) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux((*r.InMerge(^uint64(0) >> c)).RotateLeft(-c)) + v.AddArg(x) + return true + } + // match: (RISBGZ (RISBGZ x {y}) {z}) + // cond: z.InMerge(y.OutMask()) != nil + // result: (RISBGZ x {(*z.InMerge(y.OutMask())).RotateLeft(y.Amount)}) + for { + z := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XRISBGZ { + break + } + y := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(z.InMerge(y.OutMask()) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux((*z.InMerge(y.OutMask())).RotateLeft(y.Amount)) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r.End == 63 && r.Start == -r.Amount&63 + // result: (SRDconst x [-r.Amount&63]) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r.End == 63 && r.Start == -r.Amount&63) { + break + } + v.reset(OpS390XSRDconst) + v.AuxInt = int8ToAuxInt(-r.Amount & 63) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r.Start == 0 && r.End == 63-r.Amount + // result: (SLDconst x [r.Amount]) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r.Start == 0 && r.End == 63-r.Amount) { + break + } + v.reset(OpS390XSLDconst) + v.AuxInt = int8ToAuxInt(r.Amount) + v.AddArg(x) + return true + } + // match: (RISBGZ (SRADconst x [c]) {r}) + // cond: r.Start == r.End && (r.Start+r.Amount)&63 <= c + // result: (RISBGZ x {s390x.NewRotateParams(r.Start, r.Start, -r.Start&63)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XSRADconst { + break + } + c := auxIntToInt8(v_0.AuxInt) + x := v_0.Args[0] + if !(r.Start == r.End && (r.Start+r.Amount)&63 <= c) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(r.Start, r.Start, -r.Start&63)) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r == s390x.NewRotateParams(56, 63, 0) + // result: (MOVBZreg x) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r == s390x.NewRotateParams(56, 63, 0)) { + break + } + v.reset(OpS390XMOVBZreg) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r == s390x.NewRotateParams(48, 63, 0) + // result: (MOVHZreg x) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r == s390x.NewRotateParams(48, 63, 0)) { + break + } + v.reset(OpS390XMOVHZreg) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r == s390x.NewRotateParams(32, 63, 0) + // result: (MOVWZreg x) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r == s390x.NewRotateParams(32, 63, 0)) { + break + } + v.reset(OpS390XMOVWZreg) + v.AddArg(x) + return true + } + // match: (RISBGZ (LGDR x) {r}) + // cond: r == s390x.NewRotateParams(1, 63, 0) + // result: (LGDR (LPDFR x)) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XLGDR { + break + } + t := v_0.Type + x := v_0.Args[0] + if !(r == s390x.NewRotateParams(1, 63, 0)) { + break + } + v.reset(OpS390XLGDR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) + v0.AddArg(x) + v.AddArg(v0) + return true + } + return false +} func rewriteValueS390X_OpS390XRLL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -13002,15 +13149,15 @@ func rewriteValueS390X_OpS390XRLLG(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (RLLG x (MOVDconst [c])) - // result: (RLLGconst x [int8(c&63)]) + // result: (RISBGZ x {s390x.NewRotateParams(0, 63, int8(c&63))}) for { x := v_0 if v_1.Op != OpS390XMOVDconst { break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpS390XRLLGconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(0, 63, int8(c&63))) v.AddArg(x) return true } @@ -13034,6 +13181,23 @@ func rewriteValueS390X_OpS390XSLD(v *Value) bool { v.AddArg(x) return true } + // match: (SLD x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SLD x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSLD) + v.AddArg2(x, y) + return true + } // match: (SLD x (AND (MOVDconst [c]) y)) // result: (SLD x (ANDWconst [int32(c&63)] y)) for { @@ -13152,6 +13316,38 @@ func rewriteValueS390X_OpS390XSLD(v *Value) bool { } func rewriteValueS390X_OpS390XSLDconst(v *Value) bool { v_0 := v.Args[0] + // match: (SLDconst (SRDconst x [c]) [d]) + // result: (RISBGZ x {s390x.NewRotateParams(max8(0, c-d), 63-d, (d-c)&63)}) + for { + d := auxIntToInt8(v.AuxInt) + if v_0.Op != OpS390XSRDconst { + break + } + c := auxIntToInt8(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(max8(0, c-d), 63-d, (d-c)&63)) + v.AddArg(x) + return true + } + // match: (SLDconst (RISBGZ x {r}) [c]) + // cond: s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask()) != nil + // result: (RISBGZ x {(*s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask())).RotateLeft(r.Amount)}) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask()) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux((*s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask())).RotateLeft(r.Amount)) + v.AddArg(x) + return true + } // match: (SLDconst x [0]) // result: x for { @@ -13170,18 +13366,54 @@ func rewriteValueS390X_OpS390XSLW(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (SLW x (MOVDconst [c])) - // result: (SLWconst x [int8(c&63)]) + // cond: c&32 == 0 + // result: (SLWconst x [int8(c&31)]) for { x := v_0 if v_1.Op != OpS390XMOVDconst { break } c := auxIntToInt64(v_1.AuxInt) + if !(c&32 == 0) { + break + } v.reset(OpS390XSLWconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AuxInt = int8ToAuxInt(int8(c & 31)) v.AddArg(x) return true } + // match: (SLW _ (MOVDconst [c])) + // cond: c&32 != 0 + // result: (MOVDconst [0]) + for { + if v_1.Op != OpS390XMOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(c&32 != 0) { + break + } + v.reset(OpS390XMOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (SLW x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SLW x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSLW) + v.AddArg2(x, y) + return true + } // match: (SLW x (AND (MOVDconst [c]) y)) // result: (SLW x (ANDWconst [int32(c&63)] y)) for { @@ -13330,6 +13562,23 @@ func rewriteValueS390X_OpS390XSRAD(v *Value) bool { v.AddArg(x) return true } + // match: (SRAD x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SRAD x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSRAD) + v.AddArg2(x, y) + return true + } // match: (SRAD x (AND (MOVDconst [c]) y)) // result: (SRAD x (ANDWconst [int32(c&63)] y)) for { @@ -13478,18 +13727,56 @@ func rewriteValueS390X_OpS390XSRAW(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (SRAW x (MOVDconst [c])) - // result: (SRAWconst x [int8(c&63)]) + // cond: c&32 == 0 + // result: (SRAWconst x [int8(c&31)]) for { x := v_0 if v_1.Op != OpS390XMOVDconst { break } c := auxIntToInt64(v_1.AuxInt) + if !(c&32 == 0) { + break + } v.reset(OpS390XSRAWconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AuxInt = int8ToAuxInt(int8(c & 31)) v.AddArg(x) return true } + // match: (SRAW x (MOVDconst [c])) + // cond: c&32 != 0 + // result: (SRAWconst x [31]) + for { + x := v_0 + if v_1.Op != OpS390XMOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(c&32 != 0) { + break + } + v.reset(OpS390XSRAWconst) + v.AuxInt = int8ToAuxInt(31) + v.AddArg(x) + return true + } + // match: (SRAW x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SRAW x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSRAW) + v.AddArg2(x, y) + return true + } // match: (SRAW x (AND (MOVDconst [c]) y)) // result: (SRAW x (ANDWconst [int32(c&63)] y)) for { @@ -13650,6 +13937,23 @@ func rewriteValueS390X_OpS390XSRD(v *Value) bool { v.AddArg(x) return true } + // match: (SRD x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SRD x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSRD) + v.AddArg2(x, y) + return true + } // match: (SRD x (AND (MOVDconst [c]) y)) // result: (SRD x (ANDWconst [int32(c&63)] y)) for { @@ -13768,24 +14072,36 @@ func rewriteValueS390X_OpS390XSRD(v *Value) bool { } func rewriteValueS390X_OpS390XSRDconst(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (SRDconst [1] (SLDconst [1] (LGDR x))) - // result: (LGDR (LPDFR x)) + // match: (SRDconst (SLDconst x [c]) [d]) + // result: (RISBGZ x {s390x.NewRotateParams(d, min8(63, 63-c+d), (c-d)&63)}) for { - if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpS390XSLDconst || auxIntToInt8(v_0.AuxInt) != 1 { + d := auxIntToInt8(v.AuxInt) + if v_0.Op != OpS390XSLDconst { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpS390XLGDR { + c := auxIntToInt8(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(d, min8(63, 63-c+d), (c-d)&63)) + v.AddArg(x) + return true + } + // match: (SRDconst (RISBGZ x {r}) [c]) + // cond: s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask()) != nil + // result: (RISBGZ x {(*s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask())).RotateLeft(r.Amount)}) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpS390XRISBGZ { break } - t := v_0_0.Type - x := v_0_0.Args[0] - v.reset(OpS390XLGDR) - v.Type = t - v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) - v0.AddArg(x) - v.AddArg(v0) + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask()) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux((*s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask())).RotateLeft(r.Amount)) + v.AddArg(x) return true } // match: (SRDconst x [0]) @@ -13806,18 +14122,54 @@ func rewriteValueS390X_OpS390XSRW(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (SRW x (MOVDconst [c])) - // result: (SRWconst x [int8(c&63)]) + // cond: c&32 == 0 + // result: (SRWconst x [int8(c&31)]) for { x := v_0 if v_1.Op != OpS390XMOVDconst { break } c := auxIntToInt64(v_1.AuxInt) + if !(c&32 == 0) { + break + } v.reset(OpS390XSRWconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AuxInt = int8ToAuxInt(int8(c & 31)) v.AddArg(x) return true } + // match: (SRW _ (MOVDconst [c])) + // cond: c&32 != 0 + // result: (MOVDconst [0]) + for { + if v_1.Op != OpS390XMOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(c&32 != 0) { + break + } + v.reset(OpS390XMOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (SRW x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SRW x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSRW) + v.AddArg2(x, y) + return true + } // match: (SRW x (AND (MOVDconst [c]) y)) // result: (SRW x (ANDWconst [int32(c&63)] y)) for { @@ -14564,9 +14916,8 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool { } break } - // match: (XOR (SLDconst x [c]) (SRDconst x [d])) - // cond: d == 64-c - // result: (RLLGconst [c] x) + // match: (XOR (SLDconst x [c]) (SRDconst x [64-c])) + // result: (RISBGZ x {s390x.NewRotateParams(0, 63, c)}) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLDconst { @@ -14574,15 +14925,11 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRDconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 64-c) { + if v_1.Op != OpS390XSRDconst || auxIntToInt8(v_1.AuxInt) != 64-c || x != v_1.Args[0] { continue } - v.reset(OpS390XRLLGconst) - v.AuxInt = int8ToAuxInt(c) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(0, 63, c)) v.AddArg(x) return true } @@ -14665,9 +15012,8 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool { } break } - // match: (XORW (SLWconst x [c]) (SRWconst x [d])) - // cond: d == 32-c - // result: (RLLconst [c] x) + // match: (XORW (SLWconst x [c]) (SRWconst x [32-c])) + // result: (RLLconst x [c]) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLWconst { @@ -14675,11 +15021,7 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRWconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 32-c) { + if v_1.Op != OpS390XSRWconst || auxIntToInt8(v_1.AuxInt) != 32-c || x != v_1.Args[0] { continue } v.reset(OpS390XRLLconst) diff --git a/src/cmd/internal/obj/s390x/rotate.go b/src/cmd/internal/obj/s390x/rotate.go index fd2d5482db..7dbc45e648 100644 --- a/src/cmd/internal/obj/s390x/rotate.go +++ b/src/cmd/internal/obj/s390x/rotate.go @@ -4,6 +4,10 @@ package s390x +import ( + "math/bits" +) + // RotateParams represents the immediates required for a "rotate // then ... selected bits instruction". // @@ -24,12 +28,18 @@ package s390x // input left by. Note that this rotation is performed // before the masked region is used. type RotateParams struct { - Start uint8 // big-endian start bit index [0..63] - End uint8 // big-endian end bit index [0..63] - Amount uint8 // amount to rotate left + Start int8 // big-endian start bit index [0..63] + End int8 // big-endian end bit index [0..63] + Amount int8 // amount to rotate left } -func NewRotateParams(start, end, amount int64) RotateParams { +// NewRotateParams creates a set of parameters representing a +// rotation left by the amount provided and a selection of the bits +// between the provided start and end indexes (inclusive). +// +// The start and end indexes and the rotation amount must all +// be in the range 0-63 inclusive or this function will panic. +func NewRotateParams(start, end, amount int8) RotateParams { if start&^63 != 0 { panic("start out of bounds") } @@ -40,8 +50,66 @@ func NewRotateParams(start, end, amount int64) RotateParams { panic("amount out of bounds") } return RotateParams{ - Start: uint8(start), - End: uint8(end), - Amount: uint8(amount), + Start: start, + End: end, + Amount: amount, } } + +// RotateLeft generates a new set of parameters with the rotation amount +// increased by the given value. The selected bits are left unchanged. +func (r RotateParams) RotateLeft(amount int8) RotateParams { + r.Amount += amount + r.Amount &= 63 + return r +} + +// OutMask provides a mask representing the selected bits. +func (r RotateParams) OutMask() uint64 { + // Note: z must be unsigned for bootstrap compiler + z := uint8(63-r.End+r.Start) & 63 // number of zero bits in mask + return bits.RotateLeft64(^uint64(0)<> 1, outMask: ^uint64(0) >> 1}, + {start: 0, end: 62, amount: 0, inMask: ^uint64(1), outMask: ^uint64(1)}, + {start: 1, end: 62, amount: 0, inMask: ^uint64(3) >> 1, outMask: ^uint64(3) >> 1}, + + // end before start, no rotation + {start: 63, end: 0, amount: 0, inMask: 1<<63 | 1, outMask: 1<<63 | 1}, + {start: 62, end: 0, amount: 0, inMask: 1<<63 | 3, outMask: 1<<63 | 3}, + {start: 63, end: 1, amount: 0, inMask: 3<<62 | 1, outMask: 3<<62 | 1}, + {start: 62, end: 1, amount: 0, inMask: 3<<62 | 3, outMask: 3<<62 | 3}, + + // rotation + {start: 32, end: 63, amount: 32, inMask: 0xffffffff00000000, outMask: 0x00000000ffffffff}, + {start: 48, end: 15, amount: 16, inMask: 0xffffffff00000000, outMask: 0xffff00000000ffff}, + {start: 0, end: 7, amount: -8 & 63, inMask: 0xff, outMask: 0xff << 56}, + } + for i, test := range tests { + r := NewRotateParams(test.start, test.end, test.amount) + if m := r.OutMask(); m != test.outMask { + t.Errorf("out mask %v: want %#x, got %#x", i, test.outMask, m) + } + if m := r.InMask(); m != test.inMask { + t.Errorf("in mask %v: want %#x, got %#x", i, test.inMask, m) + } + } +} + +func TestRotateParamsMerge(t *testing.T) { + tests := []struct { + // inputs + src RotateParams + mask uint64 + + // results + in *RotateParams + out *RotateParams + }{ + { + src: RotateParams{Start: 48, End: 15, Amount: 16}, + mask: 0xffffffffffffffff, + in: &RotateParams{Start: 48, End: 15, Amount: 16}, + out: &RotateParams{Start: 48, End: 15, Amount: 16}, + }, + { + src: RotateParams{Start: 16, End: 47, Amount: 0}, + mask: 0x00000000ffffffff, + in: &RotateParams{Start: 32, End: 47, Amount: 0}, + out: &RotateParams{Start: 32, End: 47, Amount: 0}, + }, + { + src: RotateParams{Start: 16, End: 47, Amount: 0}, + mask: 0xffff00000000ffff, + in: nil, + out: nil, + }, + { + src: RotateParams{Start: 0, End: 63, Amount: 0}, + mask: 0xf7f0000000000000, + in: nil, + out: nil, + }, + { + src: RotateParams{Start: 0, End: 63, Amount: 1}, + mask: 0x000000000000ff00, + in: &RotateParams{Start: 47, End: 54, Amount: 1}, + out: &RotateParams{Start: 48, End: 55, Amount: 1}, + }, + { + src: RotateParams{Start: 32, End: 63, Amount: 32}, + mask: 0xffff00000000ffff, + in: &RotateParams{Start: 32, End: 47, Amount: 32}, + out: &RotateParams{Start: 48, End: 63, Amount: 32}, + }, + { + src: RotateParams{Start: 0, End: 31, Amount: 32}, + mask: 0x8000000000000000, + in: nil, + out: &RotateParams{Start: 0, End: 0, Amount: 32}, + }, + { + src: RotateParams{Start: 0, End: 31, Amount: 32}, + mask: 0x0000000080000000, + in: &RotateParams{Start: 0, End: 0, Amount: 32}, + out: nil, + }, + } + + eq := func(x, y *RotateParams) bool { + if x == nil && y == nil { + return true + } + if x == nil || y == nil { + return false + } + return *x == *y + } + + for _, test := range tests { + if r := test.src.InMerge(test.mask); !eq(r, test.in) { + t.Errorf("%v merged with %#x (input): want %v, got %v", test.src, test.mask, test.in, r) + } + if r := test.src.OutMerge(test.mask); !eq(r, test.out) { + t.Errorf("%v merged with %#x (output): want %v, got %v", test.src, test.mask, test.out, r) + } + } +} diff --git a/test/codegen/bitfield.go b/test/codegen/bitfield.go index 08788f1447..7abc1c2783 100644 --- a/test/codegen/bitfield.go +++ b/test/codegen/bitfield.go @@ -127,11 +127,13 @@ func sbfx6(x int32) int32 { // ubfiz func ubfiz1(x uint64) uint64 { // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND" + // s390x:"RISBGZ\t[$]49, [$]60, [$]3,",-"SLD",-"AND" return (x & 0xfff) << 3 } func ubfiz2(x uint64) uint64 { // arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND" + // s390x:"RISBGZ\t[$]48, [$]59, [$]4,",-"SLD",-"AND" return (x << 4) & 0xfff0 } @@ -149,6 +151,7 @@ func ubfiz5(x uint8) uint64 { func ubfiz6(x uint64) uint64 { // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]3, [$]62, [$]1, ",-"SLD",-"SRD" return (x << 4) >> 3 } @@ -159,6 +162,7 @@ func ubfiz7(x uint32) uint32 { func ubfiz8(x uint64) uint64 { // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]43, [$]62, [$]1, ",-"SLD",-"SRD",-"AND" return ((x & 0xfffff) << 4) >> 3 } @@ -169,17 +173,20 @@ func ubfiz9(x uint64) uint64 { func ubfiz10(x uint64) uint64 { // arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]45, [$]56, [$]7, ",-"SLD",-"SRD",-"AND" return ((x << 5) & (0xfff << 5)) << 2 } // ubfx func ubfx1(x uint64) uint64 { // arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]54, [$]63, [$]39, ",-"SRD",-"AND" return (x >> 25) & 1023 } func ubfx2(x uint64) uint64 { // arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]56, [$]63, [$]60, ",-"SRD",-"AND" return (x & 0x0ff0) >> 4 } @@ -196,30 +203,37 @@ func ubfx5(x uint8) uint64 { } func ubfx6(x uint64) uint64 { - return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR" + // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]2, [$]63, [$]63,",-"SLD",-"SRD" + return (x << 1) >> 2 } func ubfx7(x uint32) uint32 { - return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR" + // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR" + return (x << 1) >> 2 } func ubfx8(x uint64) uint64 { // arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]52, [$]63, [$]63,",-"SLD",-"SRD",-"AND" return ((x << 1) >> 2) & 0xfff } func ubfx9(x uint64) uint64 { // arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]53, [$]63, [$]60, ",-"SLD",-"SRD",-"AND" return ((x >> 3) & 0xfff) >> 1 } func ubfx10(x uint64) uint64 { // arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]8, [$]63, [$]59, ",-"SLD",-"SRD" return ((x >> 2) << 5) >> 8 } func ubfx11(x uint64) uint64 { // arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]45, [$]63, [$]63, ",-"SLD",-"SRD",-"AND" return ((x & 0xfffff) << 3) >> 4 } diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 398dd84e9e..56e0f3474e 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -340,3 +340,15 @@ func bitSetTest(x int) bool { // amd64:"CMPQ\tAX, [$]9" return x&9 == 9 } + +// mask contiguous one bits +func cont1Mask64U(x uint64) uint64 { + // s390x:"RISBGZ\t[$]16, [$]47, [$]0," + return x&0x0000ffffffff0000 +} + +// mask contiguous zero bits +func cont0Mask64U(x uint64) uint64 { + // s390x:"RISBGZ\t[$]48, [$]15, [$]0," + return x&0xffff00000000ffff +} diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 4c35f26997..fff6639546 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -213,7 +213,7 @@ func RotateLeft64(n uint64) uint64 { // arm64:"ROR" // ppc64:"ROTL" // ppc64le:"ROTL" - // s390x:"RLLG" + // s390x:"RISBGZ\t[$]0, [$]63, [$]37, " // wasm:"I64Rotl" return bits.RotateLeft64(n, 37) } diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go index 0c8b030970..e0bcd0abbc 100644 --- a/test/codegen/rotate.go +++ b/test/codegen/rotate.go @@ -17,21 +17,21 @@ func rot64(x uint64) uint64 { // amd64:"ROLQ\t[$]7" // arm64:"ROR\t[$]57" - // s390x:"RLLG\t[$]7" + // s390x:"RISBGZ\t[$]0, [$]63, [$]7, " // ppc64:"ROTL\t[$]7" // ppc64le:"ROTL\t[$]7" a += x<<7 | x>>57 // amd64:"ROLQ\t[$]8" // arm64:"ROR\t[$]56" - // s390x:"RLLG\t[$]8" + // s390x:"RISBGZ\t[$]0, [$]63, [$]8, " // ppc64:"ROTL\t[$]8" // ppc64le:"ROTL\t[$]8" a += x<<8 + x>>56 // amd64:"ROLQ\t[$]9" // arm64:"ROR\t[$]55" - // s390x:"RLLG\t[$]9" + // s390x:"RISBGZ\t[$]0, [$]63, [$]9, " // ppc64:"ROTL\t[$]9" // ppc64le:"ROTL\t[$]9" a += x<<9 ^ x>>55 diff --git a/test/codegen/shift.go b/test/codegen/shift.go index a45f27c9cf..d19a1984c1 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -11,84 +11,84 @@ package codegen // ------------------ // func lshMask64x64(v int64, s uint64) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v << (s & 63) } func rshMask64Ux64(v uint64, s uint64) uint64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> (s & 63) } func rshMask64x64(v int64, s uint64) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> (s & 63) } func lshMask32x64(v int32, s uint64) int32 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ISEL",-"ORN" // ppc64:"ISEL",-"ORN" return v << (s & 63) } func rshMask32Ux64(v uint32, s uint64) uint32 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ISEL",-"ORN" // ppc64:"ISEL",-"ORN" return v >> (s & 63) } func rshMask32x64(v int32, s uint64) int32 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ISEL",-"ORN" // ppc64:"ISEL",-"ORN" return v >> (s & 63) } func lshMask64x32(v int64, s uint32) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN" // ppc64:"ANDCC",-"ORN" return v << (s & 63) } func rshMask64Ux32(v uint64, s uint32) uint64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN" // ppc64:"ANDCC",-"ORN" return v >> (s & 63) } func rshMask64x32(v int64, s uint32) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> (s & 63) } func lshMask64x32Ext(v int64, s int32) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v << uint(s&63) } func rshMask64Ux32Ext(v uint64, s int32) uint64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> uint(s&63) } func rshMask64x32Ext(v int64, s int32) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> uint(s&63) @@ -128,7 +128,8 @@ func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) { func rshGuarded64(v int64, s uint) int64 { if s < 64 { - // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" return v >> s } panic("shift too large") @@ -136,7 +137,8 @@ func rshGuarded64(v int64, s uint) int64 { func rshGuarded64U(v uint64, s uint) uint64 { if s < 64 { - // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" return v >> s } panic("shift too large") @@ -144,7 +146,8 @@ func rshGuarded64U(v uint64, s uint) uint64 { func lshGuarded64(v int64, s uint) int64 { if s < 64 { - // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" return v << s } panic("shift too large") -- cgit v1.3