aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj/ppc64
diff options
context:
space:
mode:
authorLynn Boger <laboger@linux.vnet.ibm.com>2017-04-18 17:05:31 -0400
committerLynn Boger <laboger@linux.vnet.ibm.com>2017-04-20 18:05:22 +0000
commit9248ff46a82aec26164a775f3eba43e3fcfb5651 (patch)
tree517b37839d64a13cc077910b9c3e14d74ae178a8 /src/cmd/internal/obj/ppc64
parent865b50c982a2c8b2a790772c6777a53c3f268bab (diff)
downloadgo-9248ff46a82aec26164a775f3eba43e3fcfb5651.tar.xz
cmd/compile: add rotates to PPC64.rules
This updates PPC64.rules to include rules to generate rotates for ADD, OR, XOR operators that combine two opposite shifts that sum to 32 or 64. To support this change opcodes for ROTL and ROTLW were added to be used like the rotldi and rotlwi extended mnemonics. This provides the following improvement in sha3: BenchmarkPermutationFunction-8 302.83 376.40 1.24x BenchmarkSha3_512_MTU-8 98.64 121.92 1.24x BenchmarkSha3_384_MTU-8 136.80 168.30 1.23x BenchmarkSha3_256_MTU-8 169.21 211.29 1.25x BenchmarkSha3_224_MTU-8 179.76 221.19 1.23x BenchmarkShake128_MTU-8 212.87 263.23 1.24x BenchmarkShake256_MTU-8 196.62 245.60 1.25x BenchmarkShake256_16x-8 163.57 194.37 1.19x BenchmarkShake256_1MiB-8 199.02 248.74 1.25x BenchmarkSha3_512_1MiB-8 106.55 133.13 1.25x Fixes #20030 Change-Id: I484c56f48395d32f53ff3ecb3ac6cb8191cfee44 Reviewed-on: https://go-review.googlesource.com/40992 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Michael Munday <munday@ca.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/internal/obj/ppc64')
-rw-r--r--src/cmd/internal/obj/ppc64/a.out.go2
-rw-r--r--src/cmd/internal/obj/ppc64/anames.go2
-rw-r--r--src/cmd/internal/obj/ppc64/asm9.go50
3 files changed, 33 insertions, 21 deletions
diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go
index 15e143d12c..90a204745b 100644
--- a/src/cmd/internal/obj/ppc64/a.out.go
+++ b/src/cmd/internal/obj/ppc64/a.out.go
@@ -680,6 +680,8 @@ const (
ARLDCLCC
ARLDICL
ARLDICLCC
+ AROTL
+ AROTLW
ASLBIA
ASLBIE
ASLBMFEE
diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go
index 01f4a7d41b..5ca29454a6 100644
--- a/src/cmd/internal/obj/ppc64/anames.go
+++ b/src/cmd/internal/obj/ppc64/anames.go
@@ -301,6 +301,8 @@ var Anames = []string{
"RLDCLCC",
"RLDICL",
"RLDICLCC",
+ "ROTL",
+ "ROTLW",
"SLBIA",
"SLBIE",
"SLBMFEE",
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go
index 033203bfd4..c835ef7f17 100644
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -1655,11 +1655,13 @@ func buildop(ctxt *obj.Link) {
opset(ASLWCC, r0)
opset(ASRW, r0)
opset(ASRWCC, r0)
+ opset(AROTLW, r0)
case ASLD:
opset(ASLDCC, r0)
opset(ASRD, r0)
opset(ASRDCC, r0)
+ opset(AROTL, r0)
case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */
opset(ASRAWCC, r0)
@@ -1971,10 +1973,12 @@ const (
OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0
OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0
OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0
+ OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0
OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0
OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0
OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0
OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0
+ OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0
)
func oclass(a *obj.Addr) int {
@@ -2258,7 +2262,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
if r == 0 {
r = int(p.To.Reg)
}
- o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
+ // AROTL and AROTLW are extended mnemonics, which map to RLDCL and RLWNM.
+ switch p.As {
+ case AROTL:
+ o1 = AOP_RLDIC(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0))
+ case AROTLW:
+ o1 = OP_RLW(OP_RLWNM, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), 0, 31)
+ default:
+ o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
+ }
case 7: /* mov r, soreg ==> stw o(r) */
r := int(p.To.Reg)
@@ -2636,32 +2648,28 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
r = int(p.To.Reg)
}
var a int
+ op := uint32(0)
switch p.As {
case ASLD, ASLDCC:
a = int(63 - v)
- o1 = OP_RLDICR
+ op = OP_RLDICR
case ASRD, ASRDCC:
a = int(v)
v = 64 - v
- o1 = OP_RLDICL
-
+ op = OP_RLDICL
+ case AROTL:
+ a = int(0)
+ op = OP_RLDICL
default:
c.ctxt.Diag("unexpected op in sldi case\n%v", p)
a = 0
o1 = 0
}
- o1 = AOP_RRR(o1, uint32(r), uint32(p.To.Reg), (uint32(v) & 0x1F))
- o1 |= (uint32(a) & 31) << 6
- if v&0x20 != 0 {
- o1 |= 1 << 1
- }
- if a&0x20 != 0 {
- o1 |= 1 << 5 /* mb[5] is top bit */
- }
+ o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
if p.As == ASLDCC || p.As == ASRDCC {
- o1 |= 1 /* Rc */
+ o1 |= 1 // Set the condition code bit
}
case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */
@@ -2978,18 +2986,18 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
v = 32
}
var mask [2]uint8
- if p.As == ASRW || p.As == ASRWCC { /* shift right */
- mask[0] = uint8(v)
- mask[1] = 31
+ switch p.As {
+ case AROTLW:
+ mask[0], mask[1] = 0, 31
+ case ASRW, ASRWCC:
+ mask[0], mask[1] = uint8(v), 31
v = 32 - v
- } else {
- mask[0] = 0
- mask[1] = uint8(31 - v)
+ default:
+ mask[0], mask[1] = 0, uint8(31-v)
}
-
o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(r), uint32(v), uint32(mask[0]), uint32(mask[1]))
if p.As == ASLWCC || p.As == ASRWCC {
- o1 |= 1 /* Rc */
+ o1 |= 1 // set the condition code
}
case 58: /* logical $andcon,[s],a */