aboutsummaryrefslogtreecommitdiff
path: root/src/cmd
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2012-06-01 10:23:15 -0400
committerRuss Cox <rsc@golang.org>2012-06-01 10:23:15 -0400
commitc48ce6930ffcab5d4beaf9654e276bb132a2b66c (patch)
tree4b50beec204a58b0e7134b5e35e1db414722094e /src/cmd
parent96b0594833f183ef41b393af3ddced8457f9e6ef (diff)
downloadgo-c48ce6930ffcab5d4beaf9654e276bb132a2b66c.tar.xz
cmd/6l: loop alignment, disabled
Saving the code in case we improve things enough that it matters later, but at least right now it is not worth doing. R=ken2 CC=golang-dev https://golang.org/cl/6248071
Diffstat (limited to 'src/cmd')
-rw-r--r--src/cmd/6l/l.h17
-rw-r--r--src/cmd/6l/span.c45
2 files changed, 61 insertions, 1 deletions
diff --git a/src/cmd/6l/l.h b/src/cmd/6l/l.h
index b1611e016a..4e271c31fe 100644
--- a/src/cmd/6l/l.h
+++ b/src/cmd/6l/l.h
@@ -41,6 +41,23 @@ enum
{
thechar = '6',
PtrSize = 8,
+
+ // Loop alignment constants:
+ // want to align loop entry to LoopAlign-byte boundary,
+ // and willing to insert at most MaxLoopPad bytes of NOP to do so.
+ // We define a loop entry as the target of a backward jump.
+ //
+ // gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
+ // and it aligns all jump targets, not just backward jump targets.
+ //
+ // As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
+ // is very slight but negative, so the alignment is disabled by
+ // setting MaxLoopPad = 0. The code is here for reference and
+ // for future experiments.
+ //
+ LoopAlign = 16,
+ MaxLoopPad = 0,
+
FuncAlign = 16
};
diff --git a/src/cmd/6l/span.c b/src/cmd/6l/span.c
index 28eb38f404..60916c0412 100644
--- a/src/cmd/6l/span.c
+++ b/src/cmd/6l/span.c
@@ -37,6 +37,37 @@ static int rexflag;
static int asmode;
static vlong vaddr(Adr*, Reloc*);
+// single-instruction no-ops of various lengths.
+// constructed by hand and disassembled with gdb to verify.
+// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
+static uchar nop[][16] = {
+ {0x90},
+ {0x66, 0x90},
+ {0x0F, 0x1F, 0x00},
+ {0x0F, 0x1F, 0x40, 0x00},
+ {0x0F, 0x1F, 0x44, 0x00, 0x00},
+ {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
+ {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+ {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+};
+
+static void
+fillnop(uchar *p, int n)
+{
+ int m;
+
+ while(n > 0) {
+ m = n;
+ if(m > nelem(nop))
+ m = nelem(nop);
+ memmove(p, nop[m-1], m);
+ p += m;
+ n -= m;
+ }
+}
+
void
span1(Sym *s)
{
@@ -52,8 +83,10 @@ span1(Sym *s)
for(p = s->text; p != P; p = p->link) {
p->back = 2; // use short branches first time through
- if((q = p->pcond) != P && (q->back & 2))
+ if((q = p->pcond) != P && (q->back & 2)) {
p->back |= 1; // backward jump
+ q->back |= 4; // loop head
+ }
if(p->as == AADJSP) {
p->to.type = D_SP;
@@ -78,6 +111,16 @@ span1(Sym *s)
s->np = 0;
c = 0;
for(p = s->text; p != P; p = p->link) {
+ if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
+ // pad with NOPs
+ v = -c&(LoopAlign-1);
+ if(v <= MaxLoopPad) {
+ symgrow(s, c+v);
+ fillnop(s->p+c, v);
+ c += v;
+ }
+ }
+
p->pc = c;
// process forward jumps to p