diff options
| author | Lynn Boger <laboger@linux.vnet.ibm.com> | 2017-04-18 17:05:31 -0400 |
|---|---|---|
| committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2017-04-20 18:05:22 +0000 |
| commit | 9248ff46a82aec26164a775f3eba43e3fcfb5651 (patch) | |
| tree | 517b37839d64a13cc077910b9c3e14d74ae178a8 /src/cmd/internal/obj/ppc64 | |
| parent | 865b50c982a2c8b2a790772c6777a53c3f268bab (diff) | |
| download | go-9248ff46a82aec26164a775f3eba43e3fcfb5651.tar.xz | |
cmd/compile: add rotates to PPC64.rules
This updates PPC64.rules to include rules to generate rotates
for ADD, OR, XOR operators that combine two opposite shifts
that sum to 32 or 64.
To support this change opcodes for ROTL and ROTLW were added to
be used like the rotldi and rotlwi extended mnemonics.
This provides the following improvement in sha3:
BenchmarkPermutationFunction-8 302.83 376.40 1.24x
BenchmarkSha3_512_MTU-8 98.64 121.92 1.24x
BenchmarkSha3_384_MTU-8 136.80 168.30 1.23x
BenchmarkSha3_256_MTU-8 169.21 211.29 1.25x
BenchmarkSha3_224_MTU-8 179.76 221.19 1.23x
BenchmarkShake128_MTU-8 212.87 263.23 1.24x
BenchmarkShake256_MTU-8 196.62 245.60 1.25x
BenchmarkShake256_16x-8 163.57 194.37 1.19x
BenchmarkShake256_1MiB-8 199.02 248.74 1.25x
BenchmarkSha3_512_1MiB-8 106.55 133.13 1.25x
Fixes #20030
Change-Id: I484c56f48395d32f53ff3ecb3ac6cb8191cfee44
Reviewed-on: https://go-review.googlesource.com/40992
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/internal/obj/ppc64')
| -rw-r--r-- | src/cmd/internal/obj/ppc64/a.out.go | 2 | ||||
| -rw-r--r-- | src/cmd/internal/obj/ppc64/anames.go | 2 | ||||
| -rw-r--r-- | src/cmd/internal/obj/ppc64/asm9.go | 50 |
3 files changed, 33 insertions, 21 deletions
diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index 15e143d12c..90a204745b 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -680,6 +680,8 @@ const ( ARLDCLCC ARLDICL ARLDICLCC + AROTL + AROTLW ASLBIA ASLBIE ASLBMFEE diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index 01f4a7d41b..5ca29454a6 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -301,6 +301,8 @@ var Anames = []string{ "RLDCLCC", "RLDICL", "RLDICLCC", + "ROTL", + "ROTLW", "SLBIA", "SLBIE", "SLBMFEE", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 033203bfd4..c835ef7f17 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -1655,11 +1655,13 @@ func buildop(ctxt *obj.Link) { opset(ASLWCC, r0) opset(ASRW, r0) opset(ASRWCC, r0) + opset(AROTLW, r0) case ASLD: opset(ASLDCC, r0) opset(ASRD, r0) opset(ASRDCC, r0) + opset(AROTL, r0) case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRAWCC, r0) @@ -1971,10 +1973,12 @@ const ( OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 + OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 + OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 ) func oclass(a *obj.Addr) int { @@ -2258,7 +2262,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { if r == 0 { r = int(p.To.Reg) } - o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg)) + // AROTL and AROTLW are extended mnemonics, which map to RLDCL and RLWNM. + switch p.As { + case AROTL: + o1 = AOP_RLDIC(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0)) + case AROTLW: + o1 = OP_RLW(OP_RLWNM, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), 0, 31) + default: + o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg)) + } case 7: /* mov r, soreg ==> stw o(r) */ r := int(p.To.Reg) @@ -2636,32 +2648,28 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { r = int(p.To.Reg) } var a int + op := uint32(0) switch p.As { case ASLD, ASLDCC: a = int(63 - v) - o1 = OP_RLDICR + op = OP_RLDICR case ASRD, ASRDCC: a = int(v) v = 64 - v - o1 = OP_RLDICL - + op = OP_RLDICL + case AROTL: + a = int(0) + op = OP_RLDICL default: c.ctxt.Diag("unexpected op in sldi case\n%v", p) a = 0 o1 = 0 } - o1 = AOP_RRR(o1, uint32(r), uint32(p.To.Reg), (uint32(v) & 0x1F)) - o1 |= (uint32(a) & 31) << 6 - if v&0x20 != 0 { - o1 |= 1 << 1 - } - if a&0x20 != 0 { - o1 |= 1 << 5 /* mb[5] is top bit */ - } + o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) if p.As == ASLDCC || p.As == ASRDCC { - o1 |= 1 /* Rc */ + o1 |= 1 // Set the condition code bit } case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */ @@ -2978,18 +2986,18 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v = 32 } var mask [2]uint8 - if p.As == ASRW || p.As == ASRWCC { /* shift right */ - mask[0] = uint8(v) - mask[1] = 31 + switch p.As { + case AROTLW: + mask[0], mask[1] = 0, 31 + case ASRW, ASRWCC: + mask[0], mask[1] = uint8(v), 31 v = 32 - v - } else { - mask[0] = 0 - mask[1] = uint8(31 - v) + default: + mask[0], mask[1] = 0, uint8(31-v) } - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(r), uint32(v), uint32(mask[0]), uint32(mask[1])) if p.As == ASLWCC || p.As == ASRWCC { - o1 |= 1 /* Rc */ + o1 |= 1 // set the condition code } case 58: /* logical $andcon,[s],a */ |
