aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2017-04-24 13:48:35 -0700
committerKeith Randall <khr@golang.org>2017-04-27 16:45:01 +0000
commit14f3ca56eda1120cb9f25f05f7fbf705d2bb0dc3 (patch)
treed7740115a87500cb3c748e55353c4122ea8ff11d /src
parentc120e449fbc618f9510387d718de0cef5f73af3a (diff)
downloadgo-14f3ca56eda1120cb9f25f05f7fbf705d2bb0dc3.tar.xz
cmd/internal/obj: ARM, use immediates instead of constant pool entries
When a constant doesn't fit in a single instruction, use two paired instructions instead of the constant pool. For example ADD $0xaa00bb, R0, R1 Used to rewrite to: MOV ?(IP), R11 ADD R11, R0, R1 Instead, do: ADD $0xaa0000, R0, R1 ADD $0xbb, R1, R1 Same number of instructions. Good: 4 less bytes (no constant pool entry) One less load. Bad: Critical path is one instruction longer. It's probably worth it to avoid the loads, they are expensive. Dave Cheney got us some performance numbers: https://perf.golang.org/search?q=upload:20170426.1 TL;DR mean 1.37% improvement. Change-Id: Ib206836161fdc94a3962db6f9caa635c87d57cf1 Reviewed-on: https://go-review.googlesource.com/41612 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/internal/obj/arm/a.out.go7
-rw-r--r--src/cmd/internal/obj/arm/anames5.go1
-rw-r--r--src/cmd/internal/obj/arm/asm5.go75
3 files changed, 78 insertions, 5 deletions
diff --git a/src/cmd/internal/obj/arm/a.out.go b/src/cmd/internal/obj/arm/a.out.go
index 35875d0b53..ea153a30da 100644
--- a/src/cmd/internal/obj/arm/a.out.go
+++ b/src/cmd/internal/obj/arm/a.out.go
@@ -121,9 +121,10 @@ const (
C_PSR
C_FCR
- C_RCON /* 0xff rotated */
- C_NCON /* ~RCON */
- C_SCON /* 0xffff */
+ C_RCON /* 0xff rotated */
+ C_NCON /* ~RCON */
+ C_RCON2 /* OR of two disjoint C_RCON constants */
+ C_SCON /* 0xffff */
C_LCON
C_LCONADDR
C_ZFCON
diff --git a/src/cmd/internal/obj/arm/anames5.go b/src/cmd/internal/obj/arm/anames5.go
index 7fdd9623bd..05892def04 100644
--- a/src/cmd/internal/obj/arm/anames5.go
+++ b/src/cmd/internal/obj/arm/anames5.go
@@ -16,6 +16,7 @@ var cnames5 = []string{
"FCR",
"RCON",
"NCON",
+ "RCON2",
"SCON",
"LCON",
"LCONADDR",
diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go
index e69948f15a..1770ab6129 100644
--- a/src/cmd/internal/obj/arm/asm5.go
+++ b/src/cmd/internal/obj/arm/asm5.go
@@ -86,16 +86,22 @@ var optab = []Optab{
{obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0},
{AADD, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
{AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
+ {AAND, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
+ {AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
{AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
{AMVN, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
{ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0},
{AADD, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0},
{AADD, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
+ {AAND, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0},
+ {AAND, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
{AMOVW, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
{AMVN, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0},
{ACMP, C_RCON, C_REG, C_NONE, 2, 4, 0, 0, 0},
{AADD, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
{AADD, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
+ {AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
+ {AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
{AMVN, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
{ACMP, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0},
{AMOVW, C_RACON, C_NONE, C_REG, 4, 4, REGSP, 0, 0},
@@ -128,14 +134,22 @@ var optab = []Optab{
{AMOVW, C_LCONADDR, C_NONE, C_REG, 12, 4, 0, LFROM | LPCREL, 4},
{AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
+ {AAND, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
+ {AAND, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
{AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
{AADD, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
{AADD, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
+ {AAND, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
+ {AAND, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
{AMVN, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
{ACMP, C_SCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
+ {AADD, C_RCON2, C_REG, C_REG, 106, 8, 0, 0, 0},
+ // TODO: RCON2: how to do AND and BIC?
{AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
{AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
+ {AAND, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
+ {AAND, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
{AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
{ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0},
{AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
@@ -957,6 +971,21 @@ func immrot(v uint32) int32 {
return 0
}
+// immrot2 returns bits encoding the immediate constant fields of two instructions,
+// such that the encoded constants x, y satisfy x|y==v, x&y==0.
+// Returns 0,0 if no such decomposition of v exists.
+func immrot2(v uint32) (uint32, uint32) {
+ for i := uint(1); i < 32; i++ {
+ m := uint32(1<<i - 1)
+ if x, y := immrot(v&m), immrot(v&^m); x != 0 && y != 0 {
+ return uint32(x), uint32(y)
+ }
+ }
+ // TODO: handle some more cases, like where
+ // the wraparound from the rotate could help.
+ return 0, 0
+}
+
func immaddr(v int32) int32 {
if v >= 0 && v <= 0xfff {
return v&0xfff | 1<<24 | 1<<23 /* pre indexing */ /* pre indexing, up */
@@ -1131,6 +1160,9 @@ func (c *ctxt5) aclass(a *obj.Addr) int {
if uint32(c.instoffset) <= 0xffff && objabi.GOARM == 7 {
return C_SCON
}
+ if x, y := immrot2(uint32(c.instoffset)); x != 0 && y != 0 {
+ return C_RCON2
+ }
return C_LCON
case obj.NAME_EXTERN,
@@ -1195,6 +1227,16 @@ func (c *ctxt5) oplook(p *obj.Prog) *Optab {
a2 = C_REG
}
+ // If Scond != 0, we must use the constant pool instead of
+ // splitting the instruction in two. The most common reason is
+ // .S (flag updating) instructions. There may be others.
+ if a1 == C_RCON2 && p.Scond != 0 {
+ a1 = C_LCON
+ }
+ if a3 == C_RCON2 && p.Scond != 0 {
+ a3 = C_LCON
+ }
+
if false { /*debug['O']*/
fmt.Printf("oplook %v %v %v %v\n", p.As, DRconv(a1), DRconv(a2), DRconv(a3))
fmt.Printf("\t\t%d %d\n", p.From.Type, p.To.Type)
@@ -1225,7 +1267,7 @@ func cmp(a int, b int) bool {
}
switch a {
case C_LCON:
- if b == C_RCON || b == C_NCON || b == C_SCON {
+ if b == C_RCON || b == C_NCON || b == C_SCON || b == C_RCON2 {
return true
}
@@ -1365,7 +1407,6 @@ func buildop(ctxt *obj.Link) {
log.Fatalf("bad code")
case AADD:
- opset(AAND, r0)
opset(AEOR, r0)
opset(ASUB, r0)
opset(ARSB, r0)
@@ -1373,6 +1414,9 @@ func buildop(ctxt *obj.Link) {
opset(ASBC, r0)
opset(ARSC, r0)
opset(AORR, r0)
+
+ case AAND:
+ opset(AAND, r0)
opset(ABIC, r0)
case ACMP:
@@ -1563,6 +1607,33 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
}
o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12
+ case 106: /* op $I,R,R where I can be decomposed into 2 immediates */
+ c.aclass(&p.From)
+ r := int(p.Reg)
+ rt := int(p.To.Reg)
+ x, y := immrot2(uint32(c.instoffset))
+ var as2 obj.As
+ switch p.As {
+ case AADD, ASUB, AORR, AEOR:
+ as2 = p.As // ADD, SUB, ORR, EOR
+ case ARSB:
+ as2 = AADD // RSB -> RSB/ADD pair
+ case AADC:
+ as2 = AADD // ADC -> ADC/ADD pair
+ case ASBC:
+ as2 = ASUB // SBC -> SBC/SUB pair
+ case ARSC:
+ as2 = AADD // RSC -> RSC/ADD pair
+ default:
+ c.ctxt.Diag("unknown second op for %v", p)
+ }
+ o1 = c.oprrr(p, p.As, int(p.Scond))
+ o2 = c.oprrr(p, as2, int(p.Scond))
+ o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12
+ o2 |= (uint32(rt)&15)<<16 | (uint32(rt)&15)<<12
+ o1 |= x
+ o2 |= y
+
case 3: /* add R<<[IR],[R],R */
o1 = c.mov(p)