diff options
| author | Lynn Boger <laboger@linux.vnet.ibm.com> | 2018-02-19 11:10:31 -0500 |
|---|---|---|
| committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2018-03-08 14:17:34 +0000 |
| commit | 5b14c7b324dc645dd7c90b22ddd86affac0637c2 (patch) | |
| tree | c66617c712efaa1539d12ed1af79a7d89c558694 /src/cmd/internal/obj/ppc64 | |
| parent | 0add9a4dcfb4cbc0d99cd168752bd1bd641ce8e2 (diff) | |
| download | go-5b14c7b324dc645dd7c90b22ddd86affac0637c2.tar.xz | |
cmd/asm, cmd/internal/obj/ppc64: avoid unnecessary load zeros
When instructions add, and, or, xor, and movd have
constant operands in some cases more instructions are
generated than necessary by the assembler.
This adds more opcode/operand combinations to the optab
and improves the code generation for the cases where the
size and sign of the constant allows the use of 1
instructions instead of 2.
Example of previous code:
oris r3, r0, 0
ori r3, r3, 65533
now:
ori r3, r0, 65533
This does not significantly reduce the overall binary size
because the improvement depends on the constant value.
Some procedures show a 1-2% reduction in size. This improvement
could also be significant in cases where the extra instructions
occur in a critical loop.
Testcase ppc64enc.s was added to cmd/asm/internal/asm/testdata
with the variations affected by this change.
Updates #23845
Change-Id: I7fdf2320c95815d99f2755ba77d0c6921cd7fad7
Reviewed-on: https://go-review.googlesource.com/95135
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/cmd/internal/obj/ppc64')
| -rw-r--r-- | src/cmd/internal/obj/ppc64/asm9.go | 104 |
1 files changed, 79 insertions, 25 deletions
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 6cbf91a11d..11c43ec2e5 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -71,6 +71,14 @@ type Optab struct { param int16 } +// This optab contains a list of opcodes with the operand +// combinations that are implemented. Not all opcodes are in this +// table, but are added later in buildop by calling opset for those +// opcodes which allow the same operand combinations as an opcode +// already in the table. +// +// The type field in the Optabl identifies the case in asmout where +// the instruction word is assembled. var optab = []Optab{ {obj.ATEXT, C_LEXT, C_NONE, C_NONE, C_TEXTSIZE, 0, 0, 0}, {obj.ATEXT, C_LEXT, C_NONE, C_LCON, C_TEXTSIZE, 0, 0, 0}, @@ -84,14 +92,18 @@ var optab = []Optab{ {AMOVWZ, C_REG, C_NONE, C_NONE, C_REG, 13, 4, 0}, {AADD, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0}, {AADD, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0}, + {AADD, C_SCON, C_REG, C_NONE, C_REG, 4, 4, 0}, + {AADD, C_SCON, C_NONE, C_NONE, C_REG, 4, 4, 0}, {AADD, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0}, {AADD, C_ADDCON, C_NONE, C_NONE, C_REG, 4, 4, 0}, {AADD, C_UCON, C_REG, C_NONE, C_REG, 20, 4, 0}, {AADD, C_UCON, C_NONE, C_NONE, C_REG, 20, 4, 0}, - {AADDIS, C_ADDCON, C_REG, C_NONE, C_REG, 20, 4, 0}, - {AADDIS, C_ADDCON, C_NONE, C_NONE, C_REG, 20, 4, 0}, + {AADD, C_ANDCON, C_REG, C_NONE, C_REG, 22, 8, 0}, + {AADD, C_ANDCON, C_NONE, C_NONE, C_REG, 22, 8, 0}, {AADD, C_LCON, C_REG, C_NONE, C_REG, 22, 12, 0}, {AADD, C_LCON, C_NONE, C_NONE, C_REG, 22, 12, 0}, + {AADDIS, C_ADDCON, C_REG, C_NONE, C_REG, 20, 4, 0}, + {AADDIS, C_ADDCON, C_NONE, C_NONE, C_REG, 20, 4, 0}, {AADDC, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0}, {AADDC, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0}, {AADDC, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0}, @@ -106,10 +118,12 @@ var optab = []Optab{ {AANDCC, C_ANDCON, C_REG, C_NONE, C_REG, 58, 4, 0}, {AANDCC, C_UCON, C_NONE, C_NONE, C_REG, 59, 4, 0}, {AANDCC, C_UCON, C_REG, C_NONE, C_REG, 59, 4, 0}, - {AANDISCC, C_ANDCON, C_NONE, C_NONE, C_REG, 59, 4, 0}, - {AANDISCC, C_ANDCON, C_REG, C_NONE, C_REG, 59, 4, 0}, + {AANDCC, C_ADDCON, C_NONE, C_NONE, C_REG, 23, 8, 0}, + {AANDCC, C_ADDCON, C_REG, C_NONE, C_REG, 23, 8, 0}, {AANDCC, C_LCON, C_NONE, C_NONE, C_REG, 23, 12, 0}, {AANDCC, C_LCON, C_REG, C_NONE, C_REG, 23, 12, 0}, + {AANDISCC, C_ANDCON, C_NONE, C_NONE, C_REG, 59, 4, 0}, + {AANDISCC, C_ANDCON, C_REG, C_NONE, C_REG, 59, 4, 0}, {AMULLW, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0}, {AMULLW, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0}, {AMULLW, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0}, @@ -128,10 +142,12 @@ var optab = []Optab{ {AOR, C_ANDCON, C_REG, C_NONE, C_REG, 58, 4, 0}, {AOR, C_UCON, C_NONE, C_NONE, C_REG, 59, 4, 0}, {AOR, C_UCON, C_REG, C_NONE, C_REG, 59, 4, 0}, - {AORIS, C_ANDCON, C_NONE, C_NONE, C_REG, 59, 4, 0}, - {AORIS, C_ANDCON, C_REG, C_NONE, C_REG, 59, 4, 0}, + {AOR, C_ADDCON, C_NONE, C_NONE, C_REG, 23, 8, 0}, + {AOR, C_ADDCON, C_REG, C_NONE, C_REG, 23, 8, 0}, {AOR, C_LCON, C_NONE, C_NONE, C_REG, 23, 12, 0}, {AOR, C_LCON, C_REG, C_NONE, C_REG, 23, 12, 0}, + {AORIS, C_ANDCON, C_NONE, C_NONE, C_REG, 59, 4, 0}, + {AORIS, C_ANDCON, C_REG, C_NONE, C_REG, 59, 4, 0}, {ADIVW, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0}, /* op r1[,r2],r3 */ {ADIVW, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0}, {ASUB, C_REG, C_REG, C_NONE, C_REG, 10, 4, 0}, /* op r2[,r1],r3 */ @@ -277,16 +293,19 @@ var optab = []Optab{ {AMOVD, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB}, {AMOVD, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP}, {AMOVD, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO}, + {AMOVD, C_ANDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO}, {AMOVW, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, /* TO DO: check */ {AMOVW, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP}, {AMOVW, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB}, {AMOVW, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP}, {AMOVW, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO}, + {AMOVW, C_ANDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO}, {AMOVWZ, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, /* TO DO: check */ {AMOVWZ, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP}, {AMOVWZ, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB}, {AMOVWZ, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP}, {AMOVWZ, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO}, + {AMOVWZ, C_ANDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO}, /* load unsigned/long constants (TO DO: check) */ {AMOVD, C_UCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO}, @@ -1048,13 +1067,25 @@ func (x ocmp) Swap(i, j int) { x[i], x[j] = x[j], x[i] } +// Used when sorting the optab. Sorting is +// done in a way so that the best choice of +// opcode/operand combination is considered first. func (x ocmp) Less(i, j int) bool { p1 := &x[i] p2 := &x[j] n := int(p1.as) - int(p2.as) + // same opcode + if n != 0 { + return n < 0 + } + // Consider those that generate fewer + // instructions first. + n = int(p1.size) - int(p2.size) if n != 0 { return n < 0 } + // operand order should match + // better choices first n = int(p1.a1) - int(p2.a1) if n != 0 { return n < 0 @@ -1073,10 +1104,15 @@ func (x ocmp) Less(i, j int) bool { } return false } + +// Add an entry to the opcode table for +// a new opcode b0 with the same operand combinations +// as opcode a. func opset(a, b0 obj.As) { oprange[a&obj.AMask] = oprange[b0] } +// Build the opcode table func buildop(ctxt *obj.Link) { if oprange[AANDN&obj.AMask] != nil { // Already initialized; stop now. @@ -2256,7 +2292,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg)) - case 3: /* mov $soreg/addcon/ucon, r ==> addis/addi $i,reg',r */ + case 3: /* mov $soreg/addcon/andcon/ucon, r ==> addis/oris/addi/ori $i,reg',r */ d := c.vregoff(&p.From) v := int32(d) @@ -2272,6 +2308,8 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { if d&0xffff != 0 { log.Fatalf("invalid handling of %v", p) } + // For UCON operands the value is right shifted 16, using ADDIS if the + // value should be signed, ORIS if unsigned. v >>= 16 if r == REGZERO && isuint32(uint64(d)) { o1 = LOP_IRR(OP_ORIS, uint32(p.To.Reg), REGZERO, uint32(v)) @@ -2279,8 +2317,16 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { } a = OP_ADDIS - } else { - if int64(int16(d)) != d { + } else if int64(int16(d)) != d { + // Operand is 16 bit value with sign bit set + if o.a1 == C_ANDCON { + // Needs unsigned 16 bit so use ORI + if r == 0 || r == REGZERO { + o1 = LOP_IRR(uint32(OP_ORI), uint32(p.To.Reg), uint32(0), uint32(v)) + break + } + // With ADDCON, needs signed 16 bit value, fall through to use ADDI + } else if o.a1 != C_ADDCON { log.Fatalf("invalid handling of %v", p) } } @@ -2632,8 +2678,6 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { o1, o2 = c.symbolAccess(p.From.Sym, d, p.To.Reg, OP_ADDI) } - //if(dlm) reloc(&p->from, p->pc, 0); - case 20: /* add $ucon,,r | addis $addcon,r,r */ v := c.regoff(&p.From) @@ -2650,43 +2694,53 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = AOP_IRR(c.opirr(AADDIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16) } - case 22: /* add $lcon,r1,r2 ==> cau+or+add */ /* could do add/sub more efficiently */ + case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add */ if p.To.Reg == REGTMP || p.Reg == REGTMP { c.ctxt.Diag("can't synthesize large constant\n%v", p) } d := c.vregoff(&p.From) - o1 = loadu32(REGTMP, d) - o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d))) r := int(p.Reg) if r == 0 { r = int(p.To.Reg) } - o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) if p.From.Sym != nil { c.ctxt.Diag("%v is not supported", p) } + // If operand is ANDCON, generate 2 instructions using + // ORI for unsigned value; with LCON 3 instructions. + if o.size == 8 { + o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d))) + o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + } else { + o1 = loadu32(REGTMP, d) + o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d))) + o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + } - //if(dlm) reloc(&p->from, p->pc, 0); - - case 23: /* and $lcon,r1,r2 ==> cau+or+and */ /* masks could be done using rlnm etc. */ + case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */ if p.To.Reg == REGTMP || p.Reg == REGTMP { c.ctxt.Diag("can't synthesize large constant\n%v", p) } d := c.vregoff(&p.From) - o1 = loadu32(REGTMP, d) - o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d))) r := int(p.Reg) if r == 0 { r = int(p.To.Reg) } - o3 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + + // With ADDCON operand, generate 2 instructions using ADDI for signed value, + // with LCON operand generate 3 instructions. + if o.size == 8 { + o1 = LOP_IRR(OP_ADDI, REGZERO, REGTMP, uint32(int32(d))) + o2 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + } else { + o1 = loadu32(REGTMP, d) + o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d))) + o3 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) + } if p.From.Sym != nil { c.ctxt.Diag("%v is not supported", p) } - //if(dlm) reloc(&p->from, p->pc, 0); - - /*24*/ case 25: /* sld[.] $sh,rS,rA -> rldicr[.] $sh,rS,mask(0,63-sh),rA; srd[.] -> rldicl */ v := c.regoff(&p.From) @@ -3090,7 +3144,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case AXOR: o1 = LOP_IRR(c.opirr(AXORIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16) case AANDCC: - o1 = LOP_IRR(c.opirr(AANDCC), uint32(p.To.Reg), uint32(r), uint32(v)>>16) + o1 = LOP_IRR(c.opirr(AANDISCC), uint32(p.To.Reg), uint32(r), uint32(v)>>16) default: o1 = LOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v)) } |
