diff options
| author | Keith Randall <khr@golang.org> | 2014-02-26 23:28:44 -0800 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2014-02-26 23:28:44 -0800 |
| commit | 1665b006a57099d7bdf5c9f1277784d36b7168d9 (patch) | |
| tree | b580dcbc40283a9ee36d08d65e9588c65b7d313a /src/pkg/runtime/stack.c | |
| parent | e5f01aee04dc6313c85dab78305adf499e1f7bfa (diff) | |
| download | go-1665b006a57099d7bdf5c9f1277784d36b7168d9.tar.xz | |
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
Diffstat (limited to 'src/pkg/runtime/stack.c')
| -rw-r--r-- | src/pkg/runtime/stack.c | 458 |
1 files changed, 426 insertions, 32 deletions
diff --git a/src/pkg/runtime/stack.c b/src/pkg/runtime/stack.c index 59441db4c1..85885e80f9 100644 --- a/src/pkg/runtime/stack.c +++ b/src/pkg/runtime/stack.c @@ -6,10 +6,20 @@ #include "arch_GOARCH.h" #include "malloc.h" #include "stack.h" +#include "funcdata.h" +#include "typekind.h" +#include "type.h" enum { + // StackDebug == 0: no logging + // == 1: logging of per-stack operations + // == 2: logging of per-frame operations + // == 3: logging of per-word updates + // == 4: logging of per-word reads StackDebug = 0, + StackFromSystem = 0, // allocate stacks from system memory instead of the heap + StackFaultOnFree = 0, // old stacks are mapped noaccess to detect use after free }; typedef struct StackCacheNode StackCacheNode; @@ -84,12 +94,17 @@ runtime·stackalloc(uint32 n) // Doing so would cause a deadlock (issue 1547). if(g != m->g0) runtime·throw("stackalloc not on scheduler stack"); + if(StackDebug >= 1) + runtime·printf("stackalloc %d\n", n); - // Stacks are usually allocated with a fixed-size free-list allocator, - // but if we need a stack of non-standard size, we fall back on malloc - // (assuming that inside malloc and GC all the stack frames are small, + if(StackFromSystem) + return runtime·SysAlloc(ROUND(n, PageSize), &mstats.stacks_sys); + + // Minimum-sized stacks are allocated with a fixed-size free-list allocator, + // but if we need a stack of a bigger size, we fall back on malloc + // (assuming that inside malloc all the stack frames are small, // so that we do not deadlock). - if(n == FixedStack || m->mallocing || m->gcing) { + if(n == FixedStack || m->mallocing) { if(n != FixedStack) { runtime·printf("stackalloc: in malloc, size=%d want %d\n", FixedStack, n); runtime·throw("stackalloc"); @@ -112,6 +127,16 @@ runtime·stackfree(void *v, uintptr n) { uint32 pos; + if(StackDebug >= 1) + runtime·printf("stackfree %p %d\n", v, (int32)n); + if(StackFromSystem) { + if(StackFaultOnFree) + runtime·SysFault(v, n); + else + runtime·SysFree(v, n, &mstats.stacks_sys); + return; + } + if(n == FixedStack || m->mallocing || m->gcing) { if(m->stackcachecnt == StackCacheSize) stackcacherelease(); @@ -145,7 +170,7 @@ runtime·oldstack(void) sp = (byte*)top; argsize = top->argsize; - if(StackDebug) { + if(StackDebug >= 1) { runtime·printf("runtime: oldstack gobuf={pc:%p sp:%p lr:%p} cret=%p argsize=%p\n", top->gobuf.pc, top->gobuf.sp, top->gobuf.lr, m->cret, (uintptr)argsize); } @@ -187,6 +212,330 @@ runtime·oldstack(void) uintptr runtime·maxstacksize = 1<<20; // enough until runtime.main sets it for real +static uint8* +mapnames[] = { + (uint8*)"---", + (uint8*)"ptr", + (uint8*)"iface", + (uint8*)"eface", +}; + +// Stack frame layout +// +// (x86) +// +------------------+ +// | args from caller | +// +------------------+ <- frame->argp +// | return address | +// +------------------+ <- frame->varp +// | locals | +// +------------------+ +// | args to callee | +// +------------------+ <- frame->sp +// +// (arm: TODO) + +typedef struct CopyableInfo CopyableInfo; +struct CopyableInfo { + byte *stk; // bottom address of segment + byte *base; // top address of segment (including Stktop) + int32 frames; // count of copyable frames (-1 = not copyable) +}; + +void runtime·main(void); + +static bool +checkframecopy(Stkframe *frame, void *arg) +{ + CopyableInfo *cinfo; + Func *f; + StackMap *stackmap; + + cinfo = arg; + f = frame->fn; + if(StackDebug >= 2) + runtime·printf(" checking %s frame=[%p,%p] stk=[%p,%p]\n", runtime·funcname(f), frame->sp, frame->fp, cinfo->stk, cinfo->base); + // if we're not in the segment any more, return immediately. + if(frame->varp < cinfo->stk || frame->varp >= cinfo->base) { + if(StackDebug >= 2) + runtime·printf(" <next segment>\n"); + return false; // stop traceback + } + if(f->entry == (uintptr)runtime·main) { + // A special routine at the TOS of the main routine. + // We will allow it to be copied even though we don't + // have full GC info for it (because it is written in C). + cinfo->frames++; + return false; // stop traceback + } + if(frame->varp != (byte*)frame->sp) { // not in prologue (and has at least one local or outarg) + stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps); + if(stackmap == nil) { + cinfo->frames = -1; + if(StackDebug >= 1) + runtime·printf("copystack: no locals info for %s\n", runtime·funcname(f)); + return false; + } + if(stackmap->n <= 0) { + cinfo->frames = -1; + if(StackDebug >= 1) + runtime·printf("copystack: locals size info only for %s\n", runtime·funcname(f)); + return false; + } + } + if(frame->arglen != 0) { + stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps); + if(stackmap == nil) { + cinfo->frames = -1; + if(StackDebug >= 1) + runtime·printf("copystack: no arg info for %s\n", runtime·funcname(f)); + return false; + } + } + cinfo->frames++; + return true; // this frame is ok; keep going +} + +// If the top segment of the stack contains an uncopyable +// frame, return -1. Otherwise return the number of frames +// in the top segment, all of which are copyable. +static int32 +copyabletopsegment(G *gp) +{ + CopyableInfo cinfo; + + cinfo.stk = (byte*)gp->stackguard - StackGuard; + cinfo.base = (byte*)gp->stackbase + sizeof(Stktop); + cinfo.frames = 0; + runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, checkframecopy, &cinfo, false); + if(StackDebug >= 1 && cinfo.frames != -1) + runtime·printf("copystack: %d copyable frames\n", cinfo.frames); + return cinfo.frames; +} + +typedef struct AdjustInfo AdjustInfo; +struct AdjustInfo { + byte *oldstk; // bottom address of segment + byte *oldbase; // top address of segment (after Stktop) + uintptr delta; // ptr distance from old to new stack (newbase - oldbase) +}; + +// bv describes the memory starting at address scanp. +// Adjust any pointers contained therein. +static void +adjustpointers(byte **scanp, BitVector *bv, AdjustInfo *adjinfo, Func *f) +{ + uintptr delta; + int32 num, i; + byte *p, *minp, *maxp; + Type *t; + Itab *tab; + + minp = adjinfo->oldstk; + maxp = adjinfo->oldbase; + delta = adjinfo->delta; + num = bv->n / BitsPerPointer; + for(i = 0; i < num; i++) { + if(StackDebug >= 4) + runtime·printf(" %p:%s:%p\n", &scanp[i], mapnames[bv->data[i / (32 / BitsPerPointer)] >> (i * BitsPerPointer & 31) & 3], scanp[i]); + switch(bv->data[i / (32 / BitsPerPointer)] >> (i * BitsPerPointer & 31) & 3) { + case BitsNoPointer: + break; + case BitsPointer: + p = scanp[i]; + if(f != nil && (byte*)0 < p && p < (byte*)PageSize) { + // Looks like a junk value in a pointer slot. + // Live analysis wrong? + runtime·printf("%p: %p %s\n", &scanp[i], p, runtime·funcname(f)); + runtime·throw("bad pointer!"); + } + if(minp <= p && p < maxp) { + if(StackDebug >= 3) + runtime·printf("adjust ptr %p\n", p); + scanp[i] = p + delta; + } + break; + case BitsEface: + t = (Type*)scanp[i]; + if(t != nil && (t->size > PtrSize || (t->kind & KindNoPointers) == 0)) { + p = scanp[i+1]; + if(minp <= p && p < maxp) { + if(StackDebug >= 3) + runtime·printf("adjust eface %p\n", p); + if(t->size > PtrSize) // currently we always allocate such objects on the heap + runtime·throw("large interface value found on stack"); + scanp[i+1] = p + delta; + } + } + break; + case BitsIface: + tab = (Itab*)scanp[i]; + if(tab != nil) { + t = tab->type; + if(t->size > PtrSize || (t->kind & KindNoPointers) == 0) { + p = scanp[i+1]; + if(minp <= p && p < maxp) { + if(StackDebug >= 3) + runtime·printf("adjust iface %p\n", p); + if(t->size > PtrSize) // currently we always allocate such objects on the heap + runtime·throw("large interface value found on stack"); + scanp[i+1] = p + delta; + } + } + } + break; + } + } +} + +// Note: the argument/return area is adjusted by the callee. +static bool +adjustframe(Stkframe *frame, void *arg) +{ + AdjustInfo *adjinfo; + Func *f; + StackMap *stackmap; + int32 pcdata; + BitVector *bv; + + adjinfo = arg; + f = frame->fn; + if(StackDebug >= 2) + runtime·printf(" adjusting %s frame=[%p,%p]\n", runtime·funcname(f), frame->sp, frame->fp); + if(f->entry == (uintptr)runtime·main) + return true; + pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, frame->pc); + if(pcdata == -1) + pcdata = 0; // in prologue + + // adjust local pointers + if(frame->varp != (byte*)frame->sp) { + stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps); + if(stackmap == nil) + runtime·throw("no locals info"); + if(stackmap->n <= 0) + runtime·throw("locals size info only"); + bv = runtime·stackmapdata(stackmap, pcdata); + if(StackDebug >= 3) + runtime·printf(" locals\n"); + adjustpointers((byte**)frame->varp - bv->n / BitsPerPointer, bv, adjinfo, f); + } + // adjust inargs and outargs + if(frame->arglen != 0) { + stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps); + if(stackmap == nil) + runtime·throw("no arg info"); + bv = runtime·stackmapdata(stackmap, pcdata); + if(StackDebug >= 3) + runtime·printf(" args\n"); + adjustpointers((byte**)frame->argp, bv, adjinfo, nil); + } + return true; +} + +static void +adjustctxt(G *gp, AdjustInfo *adjinfo) +{ + if(adjinfo->oldstk <= (byte*)gp->sched.ctxt && (byte*)gp->sched.ctxt < adjinfo->oldbase) + gp->sched.ctxt = (byte*)gp->sched.ctxt + adjinfo->delta; +} + +static void +adjustdefers(G *gp, AdjustInfo *adjinfo) +{ + Defer *d, **dp; + Func *f; + FuncVal *fn; + StackMap *stackmap; + BitVector *bv; + + for(dp = &gp->defer, d = *dp; d != nil; dp = &d->link, d = *dp) { + if(adjinfo->oldstk <= (byte*)d && (byte*)d < adjinfo->oldbase) { + // The Defer record is on the stack. Its fields will + // get adjusted appropriately. + // This only happens for runtime.main now, but a compiler + // optimization could do more of this. + *dp = (Defer*)((byte*)d + adjinfo->delta); + continue; + } + if(d->argp < adjinfo->oldstk || adjinfo->oldbase <= d->argp) + break; // a defer for the next segment + f = runtime·findfunc((uintptr)d->fn->fn); + if(f == nil) { + runtime·printf("runtime: bad defer %p %d %d %p %p\n", d->fn->fn, d->siz, d->special, d->argp, d->pc); + runtime·printf("caller %s\n", runtime·funcname(runtime·findfunc((uintptr)d->pc))); + runtime·throw("can't adjust unknown defer"); + } + if(StackDebug >= 4) + runtime·printf(" checking defer %s\n", runtime·funcname(f)); + // Defer's FuncVal might be on the stack + fn = d->fn; + if(adjinfo->oldstk <= (byte*)fn && (byte*)fn < adjinfo->oldbase) { + if(StackDebug >= 3) + runtime·printf(" adjust defer fn %s\n", runtime·funcname(f)); + d->fn = (FuncVal*)((byte*)fn + adjinfo->delta); + } else { + // deferred function's closure args might point into the stack. + if(StackDebug >= 3) + runtime·printf(" adjust deferred args for %s\n", runtime·funcname(f)); + stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps); + if(stackmap == nil) + runtime·throw("runtime: deferred function has no arg ptr map"); + bv = runtime·stackmapdata(stackmap, 0); + adjustpointers(d->args, bv, adjinfo, f); + } + d->argp += adjinfo->delta; + } +} + +// Copies the top stack segment of gp to a new stack segment of a +// different size. The top segment must contain nframes frames. +static void +copystack(G *gp, uintptr nframes, uintptr newsize) +{ + byte *oldstk, *oldbase, *newstk, *newbase; + uintptr oldsize, used; + AdjustInfo adjinfo; + + if(gp->syscallstack != 0) + runtime·throw("can't handle stack copy in syscall yet"); + oldstk = (byte*)gp->stackguard - StackGuard; + oldbase = (byte*)gp->stackbase + sizeof(Stktop); + oldsize = oldbase - oldstk; + used = oldbase - (byte*)gp->sched.sp; + + // allocate new stack + newstk = runtime·stackalloc(newsize); + newbase = newstk + newsize; + + if(StackDebug >= 1) + runtime·printf("copystack [%p %p]/%d -> [%p %p]/%d\n", oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize); + + // adjust pointers in the to-be-copied frames + adjinfo.oldstk = oldstk; + adjinfo.oldbase = oldbase; + adjinfo.delta = newbase - oldbase; + runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, nframes, adjustframe, &adjinfo, false); + + // adjust other miscellaneous things that have pointers into stacks. + adjustctxt(gp, &adjinfo); + adjustdefers(gp, &adjinfo); + + // copy the stack to the new location + runtime·memmove(newbase - used, oldbase - used, used); + + // Swap out old stack for new one + gp->stackbase = (uintptr)newbase - sizeof(Stktop); + gp->stackguard = (uintptr)newstk + StackGuard; + gp->stackguard0 = (uintptr)newstk + StackGuard; // NOTE: might clobber a preempt request + if(gp->stack0 == (uintptr)oldstk) + gp->stack0 = (uintptr)newstk; + gp->sched.sp = (uintptr)(newbase - used); + + // free old stack + runtime·stackfree(oldstk, oldsize); +} + // Called from runtime·newstackcall or from runtime·morestack when a new // stack segment is needed. Allocate a new stack big enough for // m->moreframesize bytes, copy m->moreargsize bytes to the new frame, @@ -195,9 +544,9 @@ uintptr runtime·maxstacksize = 1<<20; // enough until runtime.main sets it for void runtime·newstack(void) { - int32 framesize, argsize, oldstatus; + int32 framesize, argsize, oldstatus, oldsize, newsize, nframes; Stktop *top, *oldtop; - byte *stk; + byte *stk, *oldstk, *oldbase; uintptr sp; uintptr *src, *dst, *dstend; G *gp; @@ -234,7 +583,7 @@ runtime·newstack(void) // The call to morestack cost a word. sp -= sizeof(uintptr); } - if(StackDebug || sp < gp->stackguard - StackGuard) { + if(StackDebug >= 1 || sp < gp->stackguard - StackGuard) { runtime·printf("runtime: newstack framesize=%p argsize=%p sp=%p stack=[%p, %p]\n" "\tmorebuf={pc:%p sp:%p lr:%p}\n" "\tsched={pc:%p sp:%p lr:%p ctxt:%p}\n", @@ -273,33 +622,47 @@ runtime·newstack(void) runtime·gosched0(gp); // never return } - if(newstackcall && m->morebuf.sp - sizeof(Stktop) - argsize - 32 > gp->stackguard) { - // special case: called from runtime.newstackcall (framesize==1) - // to call code with an arbitrary argument size, - // and we have enough space on the current stack. - // the new Stktop* is necessary to unwind, but - // we don't need to create a new segment. - top = (Stktop*)(m->morebuf.sp - sizeof(*top)); - stk = (byte*)gp->stackguard - StackGuard; - free = 0; - } else { - // allocate new segment. - framesize += argsize; - framesize += StackExtra; // room for more functions, Stktop. - if(framesize < StackMin) - framesize = StackMin; - framesize += StackSystem; - gp->stacksize += framesize; - if(gp->stacksize > runtime·maxstacksize) { - runtime·printf("runtime: goroutine stack exceeds %D-byte limit\n", (uint64)runtime·maxstacksize); - runtime·throw("stack overflow"); + // If every frame on the top segment is copyable, allocate a bigger segment + // and move the segment instead of allocating a new segment. + if(runtime·copystack) { + if(!runtime·precisestack) + runtime·throw("can't copy stacks without precise stacks"); + nframes = copyabletopsegment(gp); + if(nframes != -1) { + oldstk = (byte*)gp->stackguard - StackGuard; + oldbase = (byte*)gp->stackbase + sizeof(Stktop); + oldsize = oldbase - oldstk; + newsize = oldsize * 2; + if(newsize > runtime·maxstacksize) { + runtime·printf("runtime: goroutine stack exceeds %D-byte limit\n", (uint64)runtime·maxstacksize); + runtime·throw("stack overflow"); + } + copystack(gp, nframes, newsize); + if(StackDebug >= 1) + runtime·printf("stack grow done\n"); + runtime·gogo(&gp->sched); } - stk = runtime·stackalloc(framesize); - top = (Stktop*)(stk+framesize-sizeof(*top)); - free = framesize; + // TODO: if stack is uncopyable because we're in C code, patch return value at + // end of C code to trigger a copy as soon as C code exits. That way, we'll + // have stack available if we get this deep again. } - if(StackDebug) { + // allocate new segment. + framesize += argsize; + framesize += StackExtra; // room for more functions, Stktop. + if(framesize < StackMin) + framesize = StackMin; + framesize += StackSystem; + gp->stacksize += framesize; + if(gp->stacksize > runtime·maxstacksize) { + runtime·printf("runtime: goroutine stack exceeds %D-byte limit\n", (uint64)runtime·maxstacksize); + runtime·throw("stack overflow"); + } + stk = runtime·stackalloc(framesize); + top = (Stktop*)(stk+framesize-sizeof(*top)); + free = framesize; + + if(StackDebug >= 1) { runtime·printf("\t-> new stack [%p, %p]\n", stk, top); } @@ -372,3 +735,34 @@ runtime·gostartcallfn(Gobuf *gobuf, FuncVal *fv) { runtime·gostartcall(gobuf, fv->fn, fv); } + +// Maybe shrink the stack being used by gp. +// Called at garbage collection time. +void +runtime·shrinkstack(G *gp) +{ + int32 nframes; + byte *oldstk, *oldbase; + uintptr used, oldsize; + + if(gp->syscallstack != (uintptr)nil) // TODO: handle this case? + return; + + oldstk = (byte*)gp->stackguard - StackGuard; + oldbase = (byte*)gp->stackbase + sizeof(Stktop); + oldsize = oldbase - oldstk; + if(oldsize / 2 < FixedStack) + return; // don't shrink below the minimum-sized stack + used = oldbase - (byte*)gp->sched.sp; + if(used >= oldsize / 4) + return; // still using at least 1/4 of the segment. + + nframes = copyabletopsegment(gp); + if(nframes == -1) + return; // TODO: handle this case. Shrink in place? + + copystack(gp, nframes, oldsize / 2); + + if(StackDebug >= 1) + runtime·printf("stack shrink done\n"); +} |
