diff options
| author | Russ Cox <rsc@golang.org> | 2012-06-01 10:23:15 -0400 |
|---|---|---|
| committer | Russ Cox <rsc@golang.org> | 2012-06-01 10:23:15 -0400 |
| commit | c48ce6930ffcab5d4beaf9654e276bb132a2b66c (patch) | |
| tree | 4b50beec204a58b0e7134b5e35e1db414722094e /src/cmd | |
| parent | 96b0594833f183ef41b393af3ddced8457f9e6ef (diff) | |
| download | go-c48ce6930ffcab5d4beaf9654e276bb132a2b66c.tar.xz | |
cmd/6l: loop alignment, disabled
Saving the code in case we improve things enough that
it matters later, but at least right now it is not worth doing.
R=ken2
CC=golang-dev
https://golang.org/cl/6248071
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/6l/l.h | 17 | ||||
| -rw-r--r-- | src/cmd/6l/span.c | 45 |
2 files changed, 61 insertions, 1 deletions
diff --git a/src/cmd/6l/l.h b/src/cmd/6l/l.h index b1611e016a..4e271c31fe 100644 --- a/src/cmd/6l/l.h +++ b/src/cmd/6l/l.h @@ -41,6 +41,23 @@ enum { thechar = '6', PtrSize = 8, + + // Loop alignment constants: + // want to align loop entry to LoopAlign-byte boundary, + // and willing to insert at most MaxLoopPad bytes of NOP to do so. + // We define a loop entry as the target of a backward jump. + // + // gcc uses MaxLoopPad = 10 for its 'generic x86-64' config, + // and it aligns all jump targets, not just backward jump targets. + // + // As of 6/1/2012, the effect of setting MaxLoopPad = 10 here + // is very slight but negative, so the alignment is disabled by + // setting MaxLoopPad = 0. The code is here for reference and + // for future experiments. + // + LoopAlign = 16, + MaxLoopPad = 0, + FuncAlign = 16 }; diff --git a/src/cmd/6l/span.c b/src/cmd/6l/span.c index 28eb38f404..60916c0412 100644 --- a/src/cmd/6l/span.c +++ b/src/cmd/6l/span.c @@ -37,6 +37,37 @@ static int rexflag; static int asmode; static vlong vaddr(Adr*, Reloc*); +// single-instruction no-ops of various lengths. +// constructed by hand and disassembled with gdb to verify. +// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. +static uchar nop[][16] = { + {0x90}, + {0x66, 0x90}, + {0x0F, 0x1F, 0x00}, + {0x0F, 0x1F, 0x40, 0x00}, + {0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, +}; + +static void +fillnop(uchar *p, int n) +{ + int m; + + while(n > 0) { + m = n; + if(m > nelem(nop)) + m = nelem(nop); + memmove(p, nop[m-1], m); + p += m; + n -= m; + } +} + void span1(Sym *s) { @@ -52,8 +83,10 @@ span1(Sym *s) for(p = s->text; p != P; p = p->link) { p->back = 2; // use short branches first time through - if((q = p->pcond) != P && (q->back & 2)) + if((q = p->pcond) != P && (q->back & 2)) { p->back |= 1; // backward jump + q->back |= 4; // loop head + } if(p->as == AADJSP) { p->to.type = D_SP; @@ -78,6 +111,16 @@ span1(Sym *s) s->np = 0; c = 0; for(p = s->text; p != P; p = p->link) { + if((p->back & 4) && (c&(LoopAlign-1)) != 0) { + // pad with NOPs + v = -c&(LoopAlign-1); + if(v <= MaxLoopPad) { + symgrow(s, c+v); + fillnop(s->p+c, v); + c += v; + } + } + p->pc = c; // process forward jumps to p |
