diff options
| author | Keith Randall <khr@golang.org> | 2014-07-17 14:41:46 -0700 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2014-07-17 14:41:46 -0700 |
| commit | f378f300345431204d5842751db2add7994d9957 (patch) | |
| tree | dc343a417545556013848a33fd49797d2197582e /src/pkg/runtime/stack.c | |
| parent | 6b2aabeecc1db46d030b9c5c5553c4e0fabba0cf (diff) | |
| download | go-f378f300345431204d5842751db2add7994d9957.tar.xz | |
undo CL 101570044 / 2c57aaea79c4
redo stack allocation. This is mostly the same as
the original CL with a few bug fixes.
1. add racemalloc() for stack allocations
2. fix poolalloc/poolfree to terminate free lists correctly.
3. adjust span ref count correctly.
4. don't use cache for sizes >= StackCacheSize.
Should fix bugs and memory leaks in original changelist.
««« original CL description
undo CL 104200047 / 318b04f28372
Breaks windows and race detector.
TBR=rsc
««« original CL description
runtime: stack allocator, separate from mallocgc
In order to move malloc to Go, we need to have a
separate stack allocator. If we run out of stack
during malloc, malloc will not be available
to allocate a new stack.
Stacks are the last remaining FlagNoGC objects in the
GC heap. Once they are out, we can get rid of the
distinction between the allocated/blockboundary bits.
(This will be in a separate change.)
Fixes #7468
Fixes #7424
LGTM=rsc, dvyukov
R=golang-codereviews, dvyukov, khr, dave, rsc
CC=golang-codereviews
https://golang.org/cl/104200047
»»»
TBR=rsc
CC=golang-codereviews
https://golang.org/cl/101570044
»»»
LGTM=dvyukov
R=dvyukov, dave, khr, alex.brainman
CC=golang-codereviews
https://golang.org/cl/112240044
Diffstat (limited to 'src/pkg/runtime/stack.c')
| -rw-r--r-- | src/pkg/runtime/stack.c | 377 |
1 files changed, 241 insertions, 136 deletions
diff --git a/src/pkg/runtime/stack.c b/src/pkg/runtime/stack.c index a07042111e..1a502bd6e7 100644 --- a/src/pkg/runtime/stack.c +++ b/src/pkg/runtime/stack.c @@ -9,6 +9,7 @@ #include "funcdata.h" #include "typekind.h" #include "type.h" +#include "race.h" #include "../../cmd/ld/textflag.h" enum @@ -21,76 +22,176 @@ enum StackDebug = 0, StackFromSystem = 0, // allocate stacks from system memory instead of the heap StackFaultOnFree = 0, // old stacks are mapped noaccess to detect use after free + + StackCache = 1, }; -typedef struct StackCacheNode StackCacheNode; -struct StackCacheNode +// Global pool of spans that have free stacks. +// Stacks are assigned an order according to size. +// order = log_2(size/FixedStack) +// There is a free list for each order. +static MSpan stackpool[NumStackOrders]; +static Lock stackpoolmu; +// TODO: one lock per order? + +void +runtime·stackinit(void) { - StackCacheNode *next; - void* batch[StackCacheBatch-1]; -}; + int32 i; -static StackCacheNode *stackcache; -static Lock stackcachemu; + if((StackCacheSize & PageMask) != 0) + runtime·throw("cache size must be a multiple of page size"); + + for(i = 0; i < NumStackOrders; i++) + runtime·MSpanList_Init(&stackpool[i]); +} + +// Allocates a stack from the free pool. Must be called with +// stackpoolmu held. +static MLink* +poolalloc(uint8 order) +{ + MSpan *list; + MSpan *s; + MLink *x; + uintptr i; -// stackcacherefill/stackcacherelease implement a global cache of stack segments. -// The cache is required to prevent unlimited growth of per-thread caches. + list = &stackpool[order]; + s = list->next; + if(s == list) { + // no free stacks. Allocate another span worth. + s = runtime·MHeap_AllocStack(&runtime·mheap, StackCacheSize >> PageShift); + if(s == nil) + runtime·throw("out of memory"); + if(s->ref != 0) + runtime·throw("bad ref"); + if(s->freelist != nil) + runtime·throw("bad freelist"); + for(i = 0; i < StackCacheSize; i += FixedStack << order) { + x = (MLink*)((s->start << PageShift) + i); + x->next = s->freelist; + s->freelist = x; + } + runtime·MSpanList_Insert(list, s); + } + x = s->freelist; + if(x == nil) + runtime·throw("span has no free stacks"); + s->freelist = x->next; + s->ref++; + if(s->freelist == nil) { + // all stacks in s are allocated. + runtime·MSpanList_Remove(s); + } + return x; +} + +// Adds stack x to the free pool. Must be called with stackpoolmu held. static void -stackcacherefill(void) +poolfree(MLink *x, uint8 order) { - StackCacheNode *n; - int32 i, pos; + MSpan *s; - runtime·lock(&stackcachemu); - n = stackcache; - if(n) - stackcache = n->next; - runtime·unlock(&stackcachemu); - if(n == nil) { - n = (StackCacheNode*)runtime·SysAlloc(FixedStack*StackCacheBatch, &mstats.stacks_sys); - if(n == nil) - runtime·throw("out of memory (stackcacherefill)"); - for(i = 0; i < StackCacheBatch-1; i++) - n->batch[i] = (byte*)n + (i+1)*FixedStack; + s = runtime·MHeap_Lookup(&runtime·mheap, x); + if(s->state != MSpanStack) + runtime·throw("freeing stack not in a stack span"); + if(s->freelist == nil) { + // s will now have a free stack + runtime·MSpanList_Insert(&stackpool[order], s); + } + x->next = s->freelist; + s->freelist = x; + s->ref--; + if(s->ref == 0) { + // span is completely free - return to heap + runtime·MSpanList_Remove(s); + s->freelist = nil; + runtime·MHeap_FreeStack(&runtime·mheap, s); } - pos = g->m->stackcachepos; - for(i = 0; i < StackCacheBatch-1; i++) { - g->m->stackcache[pos] = n->batch[i]; - pos = (pos + 1) % StackCacheSize; +} + +// stackcacherefill/stackcacherelease implement a global pool of stack segments. +// The pool is required to prevent unlimited growth of per-thread caches. +static void +stackcacherefill(MCache *c, uint8 order) +{ + MLink *x, *list; + uintptr size; + + if(StackDebug >= 1) + runtime·printf("stackcacherefill order=%d\n", order); + + // Grab some stacks from the global cache. + // Grab half of the allowed capacity (to prevent thrashing). + list = nil; + size = 0; + runtime·lock(&stackpoolmu); + while(size < StackCacheSize/2) { + x = poolalloc(order); + x->next = list; + list = x; + size += FixedStack << order; } - g->m->stackcache[pos] = n; - pos = (pos + 1) % StackCacheSize; - g->m->stackcachepos = pos; - g->m->stackcachecnt += StackCacheBatch; + runtime·unlock(&stackpoolmu); + + c->stackcache[order].list = list; + c->stackcache[order].size = size; } static void -stackcacherelease(void) +stackcacherelease(MCache *c, uint8 order) { - StackCacheNode *n; - uint32 i, pos; + MLink *x, *y; + uintptr size; - pos = (g->m->stackcachepos - g->m->stackcachecnt) % StackCacheSize; - n = (StackCacheNode*)g->m->stackcache[pos]; - pos = (pos + 1) % StackCacheSize; - for(i = 0; i < StackCacheBatch-1; i++) { - n->batch[i] = g->m->stackcache[pos]; - pos = (pos + 1) % StackCacheSize; + if(StackDebug >= 1) + runtime·printf("stackcacherelease order=%d\n", order); + x = c->stackcache[order].list; + size = c->stackcache[order].size; + runtime·lock(&stackpoolmu); + while(size > StackCacheSize/2) { + y = x->next; + poolfree(x, order); + x = y; + size -= FixedStack << order; } - g->m->stackcachecnt -= StackCacheBatch; - runtime·lock(&stackcachemu); - n->next = stackcache; - stackcache = n; - runtime·unlock(&stackcachemu); + runtime·unlock(&stackpoolmu); + c->stackcache[order].list = x; + c->stackcache[order].size = size; +} + +void +runtime·stackcache_clear(MCache *c) +{ + uint8 order; + MLink *x, *y; + + if(StackDebug >= 1) + runtime·printf("stackcache clear\n"); + runtime·lock(&stackpoolmu); + for(order = 0; order < NumStackOrders; order++) { + x = c->stackcache[order].list; + while(x != nil) { + y = x->next; + poolfree(x, order); + x = y; + } + c->stackcache[order].list = nil; + c->stackcache[order].size = 0; + } + runtime·unlock(&stackpoolmu); } void* runtime·stackalloc(G *gp, uint32 n) { - uint32 pos; + uint8 order; + uint32 n2; void *v; - bool malloced; Stktop *top; + MLink *x; + MSpan *s; + MCache *c; // Stackalloc must be called on scheduler stack, so that we // never try to grow the stack during the code that stackalloc runs. @@ -110,41 +211,60 @@ runtime·stackalloc(G *gp, uint32 n) return v; } - // Minimum-sized stacks are allocated with a fixed-size free-list allocator, - // but if we need a stack of a bigger size, we fall back on malloc - // (assuming that inside malloc all the stack frames are small, - // so that we do not deadlock). - malloced = true; - if(n == FixedStack || g->m->mallocing) { - if(n != FixedStack) { - runtime·printf("stackalloc: in malloc, size=%d want %d\n", FixedStack, n); - runtime·throw("stackalloc"); + // Small stacks are allocated with a fixed-size free-list allocator. + // If we need a stack of a bigger size, we fall back on allocating + // a dedicated span. + if(StackCache && n < FixedStack << NumStackOrders && n < StackCacheSize) { + order = 0; + n2 = n; + while(n2 > FixedStack) { + order++; + n2 >>= 1; } - if(g->m->stackcachecnt == 0) - stackcacherefill(); - pos = g->m->stackcachepos; - pos = (pos - 1) % StackCacheSize; - v = g->m->stackcache[pos]; - g->m->stackcachepos = pos; - g->m->stackcachecnt--; - g->m->stackinuse++; - malloced = false; - } else - v = runtime·mallocgc(n, 0, FlagNoProfiling|FlagNoGC|FlagNoZero|FlagNoInvokeGC); - + c = g->m->mcache; + if(c == nil) { + // This can happen in the guts of exitsyscall or + // procresize. Just get a stack from the global pool. + runtime·lock(&stackpoolmu); + x = poolalloc(order); + runtime·unlock(&stackpoolmu); + } else { + x = c->stackcache[order].list; + if(x == nil) { + stackcacherefill(c, order); + x = c->stackcache[order].list; + } + c->stackcache[order].list = x->next; + c->stackcache[order].size -= n; + } + v = (byte*)x; + } else { + s = runtime·MHeap_AllocStack(&runtime·mheap, ROUND(n, PageSize) >> PageShift); + if(s == nil) + runtime·throw("out of memory"); + v = (byte*)(s->start<<PageShift); + } top = (Stktop*)((byte*)v+n-sizeof(Stktop)); runtime·memclr((byte*)top, sizeof(*top)); - top->malloced = malloced; + if(raceenabled) + runtime·racemalloc(v, n); + if(StackDebug >= 1) + runtime·printf(" allocated %p\n", v); return v; } void runtime·stackfree(G *gp, void *v, Stktop *top) { - uint32 pos; - uintptr n; - + uint8 order; + uintptr n, n2; + MSpan *s; + MLink *x; + MCache *c; + n = (uintptr)(top+1) - (uintptr)v; + if(n & (n-1)) + runtime·throw("stack not a power of 2"); if(StackDebug >= 1) runtime·printf("stackfree %p %d\n", v, (int32)n); gp->stacksize -= n; @@ -155,19 +275,34 @@ runtime·stackfree(G *gp, void *v, Stktop *top) runtime·SysFree(v, n, &mstats.stacks_sys); return; } - if(top->malloced) { - runtime·free(v); - return; + if(StackCache && n < FixedStack << NumStackOrders && n < StackCacheSize) { + order = 0; + n2 = n; + while(n2 > FixedStack) { + order++; + n2 >>= 1; + } + x = (MLink*)v; + c = g->m->mcache; + if(c == nil) { + runtime·lock(&stackpoolmu); + poolfree(x, order); + runtime·unlock(&stackpoolmu); + } else { + if(c->stackcache[order].size >= StackCacheSize) + stackcacherelease(c, order); + x->next = c->stackcache[order].list; + c->stackcache[order].list = x; + c->stackcache[order].size += n; + } + } else { + s = runtime·MHeap_Lookup(&runtime·mheap, v); + if(s->state != MSpanStack) { + runtime·printf("%p %p\n", s->start<<PageShift, v); + runtime·throw("bad span state"); + } + runtime·MHeap_FreeStack(&runtime·mheap, s); } - if(n != FixedStack) - runtime·throw("stackfree: bad fixed size"); - if(g->m->stackcachecnt == StackCacheSize) - stackcacherelease(); - pos = g->m->stackcachepos; - g->m->stackcache[pos] = v; - g->m->stackcachepos = (pos + 1) % StackCacheSize; - g->m->stackcachecnt++; - g->m->stackinuse--; } // Called from runtime·lessstack when returning from a function which @@ -186,6 +321,8 @@ runtime·oldstack(void) gp = g->m->curg; top = (Stktop*)gp->stackbase; + if(top == nil) + runtime·throw("nil stackbase"); old = (byte*)gp->stackguard - StackGuard; sp = (byte*)top; argsize = top->argsize; @@ -324,6 +461,8 @@ copyabletopsegment(G *gp) FuncVal *fn; StackMap *stackmap; + if(gp->stackbase == 0) + runtime·throw("stackbase == 0"); cinfo.stk = (byte*)gp->stackguard - StackGuard; cinfo.base = (byte*)gp->stackbase + sizeof(Stktop); cinfo.frames = 0; @@ -599,11 +738,12 @@ copystack(G *gp, uintptr nframes, uintptr newsize) uintptr oldsize, used; AdjustInfo adjinfo; Stktop *oldtop, *newtop; - bool malloced; if(gp->syscallstack != 0) runtime·throw("can't handle stack copy in syscall yet"); oldstk = (byte*)gp->stackguard - StackGuard; + if(gp->stackbase == 0) + runtime·throw("nil stackbase"); oldbase = (byte*)gp->stackbase + sizeof(Stktop); oldsize = oldbase - oldstk; used = oldbase - (byte*)gp->sched.sp; @@ -613,10 +753,9 @@ copystack(G *gp, uintptr nframes, uintptr newsize) newstk = runtime·stackalloc(gp, newsize); newbase = newstk + newsize; newtop = (Stktop*)(newbase - sizeof(Stktop)); - malloced = newtop->malloced; if(StackDebug >= 1) - runtime·printf("copystack [%p %p]/%d -> [%p %p]/%d\n", oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize); + runtime·printf("copystack gp=%p [%p %p]/%d -> [%p %p]/%d\n", gp, oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize); USED(oldsize); // adjust pointers in the to-be-copied frames @@ -631,7 +770,6 @@ copystack(G *gp, uintptr nframes, uintptr newsize) // copy the stack (including Stktop) to the new location runtime·memmove(newbase - used, oldbase - used, used); - newtop->malloced = malloced; // Swap out old stack for new one gp->stackbase = (uintptr)newtop; @@ -707,6 +845,8 @@ runtime·newstack(void) if(!newstackcall) runtime·rewindmorestack(&gp->sched); + if(gp->stackbase == 0) + runtime·throw("nil stackbase"); sp = gp->sched.sp; if(thechar == '6' || thechar == '8') { // The call to morestack cost a word. @@ -792,7 +932,7 @@ runtime·newstack(void) top = (Stktop*)(stk+framesize-sizeof(*top)); if(StackDebug >= 1) { - runtime·printf("\t-> new stack [%p, %p]\n", stk, top); + runtime·printf("\t-> new stack gp=%p [%p, %p]\n", gp, stk, top); } top->stackbase = gp->stackbase; @@ -881,10 +1021,14 @@ runtime·shrinkstack(G *gp) int32 nframes; byte *oldstk, *oldbase; uintptr used, oldsize, newsize; - MSpan *span; if(!runtime·copystack) return; + if(gp->status == Gdead) + return; + if(gp->stackbase == 0) + runtime·throw("stackbase == 0"); + //return; // TODO: why does this happen? oldstk = (byte*)gp->stackguard - StackGuard; oldbase = (byte*)gp->stackbase + sizeof(Stktop); oldsize = oldbase - oldstk; @@ -895,53 +1039,14 @@ runtime·shrinkstack(G *gp) if(used >= oldsize / 4) return; // still using at least 1/4 of the segment. - // To shrink to less than 1/2 a page, we need to copy. - if(newsize < PageSize/2) { - if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case? - return; + if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case? + return; #ifdef GOOS_windows - if(gp->m != nil && gp->m->libcallsp != 0) - return; -#endif - nframes = copyabletopsegment(gp); - if(nframes == -1) - return; - copystack(gp, nframes, newsize); + if(gp->m != nil && gp->m->libcallsp != 0) return; - } - - // To shrink a stack of one page size or more, we can shrink it - // without copying. Just deallocate the lower half. - span = runtime·MHeap_LookupMaybe(&runtime·mheap, oldstk); - if(span == nil) - return; // stack allocated outside heap. Can't shrink it. Can happen if stack is allocated while inside malloc. TODO: shrink by copying? - if(span->elemsize != oldsize) - runtime·throw("span element size doesn't match stack size"); - if((uintptr)oldstk != span->start << PageShift) - runtime·throw("stack not at start of span"); - - if(StackDebug) - runtime·printf("shrinking stack in place %p %X->%X\n", oldstk, oldsize, newsize); - - // new stack guard for smaller stack - gp->stackguard = (uintptr)oldstk + newsize + StackGuard; - gp->stackguard0 = (uintptr)oldstk + newsize + StackGuard; - if(gp->stack0 == (uintptr)oldstk) - gp->stack0 = (uintptr)oldstk + newsize; - gp->stacksize -= oldsize - newsize; - - // Free bottom half of the stack. - if(runtime·debug.efence || StackFromSystem) { - if(runtime·debug.efence || StackFaultOnFree) - runtime·SysFault(oldstk, newsize); - else - runtime·SysFree(oldstk, newsize, &mstats.stacks_sys); +#endif + nframes = copyabletopsegment(gp); + if(nframes == -1) return; - } - // First, we trick malloc into thinking - // we allocated the stack as two separate half-size allocs. Then the - // free() call does the rest of the work for us. - runtime·MSpan_EnsureSwept(span); - runtime·MHeap_SplitSpan(&runtime·mheap, span); - runtime·free(oldstk); + copystack(gp, nframes, newsize); } |
