diff options
Diffstat (limited to 'src/pkg/runtime/stack.c')
| -rw-r--r-- | src/pkg/runtime/stack.c | 346 |
1 files changed, 135 insertions, 211 deletions
diff --git a/src/pkg/runtime/stack.c b/src/pkg/runtime/stack.c index 96ba515c68..a07042111e 100644 --- a/src/pkg/runtime/stack.c +++ b/src/pkg/runtime/stack.c @@ -21,163 +21,76 @@ enum StackDebug = 0, StackFromSystem = 0, // allocate stacks from system memory instead of the heap StackFaultOnFree = 0, // old stacks are mapped noaccess to detect use after free - - StackCache = 1, }; -// Global pool of spans that have free stacks. -// Stacks are assigned an order according to size. -// order = log_2(size/FixedStack) -// There is a free list for each order. -static MSpan stackpool[NumStackOrders]; -static Lock stackpoolmu; -// TODO: one lock per order? - -void -runtime·stackinit(void) -{ - int32 i; - - for(i = 0; i < NumStackOrders; i++) - runtime·MSpanList_Init(&stackpool[i]); -} - -// Allocates a stack from the free pool. Must be called with -// stackpoolmu held. -static MLink* -poolalloc(uint8 order) +typedef struct StackCacheNode StackCacheNode; +struct StackCacheNode { - MSpan *list; - MSpan *s; - MLink *x; - uintptr i; + StackCacheNode *next; + void* batch[StackCacheBatch-1]; +}; - list = &stackpool[order]; - s = list->next; - if(s == list) { - // no free stacks. Allocate another span worth. - s = runtime·MHeap_AllocStack(&runtime·mheap, StackCacheSize >> PageShift); - if(s == nil) - runtime·throw("out of memory"); - for(i = 0; i < StackCacheSize; i += FixedStack << order) { - x = (MLink*)((s->start << PageShift) + i); - x->next = s->freelist; - s->freelist = x; - } - } - x = s->freelist; - s->freelist = x->next; - s->ref--; - if(s->ref == 0) { - // all stacks in s are allocated. - runtime·MSpanList_Remove(s); - } - return x; -} +static StackCacheNode *stackcache; +static Lock stackcachemu; -// Adds stack x to the free pool. Must be called with stackpoolmu held. +// stackcacherefill/stackcacherelease implement a global cache of stack segments. +// The cache is required to prevent unlimited growth of per-thread caches. static void -poolfree(MLink *x, uint8 order) +stackcacherefill(void) { - MSpan *s; + StackCacheNode *n; + int32 i, pos; - s = runtime·MHeap_Lookup(&runtime·mheap, x); - x->next = s->freelist; - s->freelist = x; - if(s->ref == 0) { - // s now has a free stack - runtime·MSpanList_Insert(&stackpool[order], s); - } - s->ref++; - if(s->ref == (StackCacheSize / FixedStack) >> order) { - // span is completely free - return to heap - runtime·MSpanList_Remove(s); - runtime·MHeap_FreeStack(&runtime·mheap, s); + runtime·lock(&stackcachemu); + n = stackcache; + if(n) + stackcache = n->next; + runtime·unlock(&stackcachemu); + if(n == nil) { + n = (StackCacheNode*)runtime·SysAlloc(FixedStack*StackCacheBatch, &mstats.stacks_sys); + if(n == nil) + runtime·throw("out of memory (stackcacherefill)"); + for(i = 0; i < StackCacheBatch-1; i++) + n->batch[i] = (byte*)n + (i+1)*FixedStack; } -} - -// stackcacherefill/stackcacherelease implement a global pool of stack segments. -// The pool is required to prevent unlimited growth of per-thread caches. -static void -stackcacherefill(MCache *c, uint8 order) -{ - MLink *x, *list; - uintptr size; - - if(StackDebug >= 1) - runtime·printf("stackcacherefill order=%d\n", order); - - // Grab some stacks from the global cache. - // Grab half of the allowed capacity (to prevent thrashing). - list = nil; - size = 0; - runtime·lock(&stackpoolmu); - while(size < StackCacheSize/2) { - x = poolalloc(order); - x->next = list; - list = x; - size += FixedStack << order; + pos = g->m->stackcachepos; + for(i = 0; i < StackCacheBatch-1; i++) { + g->m->stackcache[pos] = n->batch[i]; + pos = (pos + 1) % StackCacheSize; } - runtime·unlock(&stackpoolmu); - - c->stackcache[order].list = list; - c->stackcache[order].size = size; + g->m->stackcache[pos] = n; + pos = (pos + 1) % StackCacheSize; + g->m->stackcachepos = pos; + g->m->stackcachecnt += StackCacheBatch; } static void -stackcacherelease(MCache *c, uint8 order) +stackcacherelease(void) { - MLink *x, *y; - uintptr size; + StackCacheNode *n; + uint32 i, pos; - if(StackDebug >= 1) - runtime·printf("stackcacherelease order=%d\n", order); - x = c->stackcache[order].list; - size = c->stackcache[order].size; - runtime·lock(&stackpoolmu); - while(size > StackCacheSize/2) { - y = x->next; - poolfree(x, order); - x = y; - size -= FixedStack << order; + pos = (g->m->stackcachepos - g->m->stackcachecnt) % StackCacheSize; + n = (StackCacheNode*)g->m->stackcache[pos]; + pos = (pos + 1) % StackCacheSize; + for(i = 0; i < StackCacheBatch-1; i++) { + n->batch[i] = g->m->stackcache[pos]; + pos = (pos + 1) % StackCacheSize; } - runtime·unlock(&stackpoolmu); - c->stackcache[order].list = x; - c->stackcache[order].size = size; -} - -void -runtime·stackcache_clear(MCache *c) -{ - uint8 order; - MLink *x, *y; - - if(StackDebug >= 1) - runtime·printf("stackcache clear\n"); - runtime·lock(&stackpoolmu); - for(order = 0; order < NumStackOrders; order++) { - x = c->stackcache[order].list; - while(x != nil) { - y = x->next; - poolfree(x, order); - x = y; - } - c->stackcache[order].list = nil; - c->stackcache[order].size = 0; - } - runtime·unlock(&stackpoolmu); + g->m->stackcachecnt -= StackCacheBatch; + runtime·lock(&stackcachemu); + n->next = stackcache; + stackcache = n; + runtime·unlock(&stackcachemu); } void* runtime·stackalloc(G *gp, uint32 n) { - uint8 order; - uint32 n2; + uint32 pos; void *v; + bool malloced; Stktop *top; - MLink *x; - MSpan *s; - MCache *c; // Stackalloc must be called on scheduler stack, so that we // never try to grow the stack during the code that stackalloc runs. @@ -197,58 +110,41 @@ runtime·stackalloc(G *gp, uint32 n) return v; } - // Small stacks are allocated with a fixed-size free-list allocator. - // If we need a stack of a bigger size, we fall back on allocating - // a dedicated span. - if(StackCache && n < FixedStack << NumStackOrders) { - order = 0; - n2 = n; - while(n2 > FixedStack) { - order++; - n2 >>= 1; + // Minimum-sized stacks are allocated with a fixed-size free-list allocator, + // but if we need a stack of a bigger size, we fall back on malloc + // (assuming that inside malloc all the stack frames are small, + // so that we do not deadlock). + malloced = true; + if(n == FixedStack || g->m->mallocing) { + if(n != FixedStack) { + runtime·printf("stackalloc: in malloc, size=%d want %d\n", FixedStack, n); + runtime·throw("stackalloc"); } - c = g->m->mcache; - if(c == nil) { - // This can happen in the guts of exitsyscall or - // procresize. Just get a stack from the global pool. - runtime·lock(&stackpoolmu); - x = poolalloc(order); - runtime·unlock(&stackpoolmu); - } else { - x = c->stackcache[order].list; - if(x == nil) { - stackcacherefill(c, order); - x = c->stackcache[order].list; - } - c->stackcache[order].list = x->next; - c->stackcache[order].size -= n; - } - v = (byte*)x; - } else { - s = runtime·MHeap_AllocStack(&runtime·mheap, (n+PageSize-1) >> PageShift); - if(s == nil) - runtime·throw("out of memory"); - v = (byte*)(s->start<<PageShift); - } + if(g->m->stackcachecnt == 0) + stackcacherefill(); + pos = g->m->stackcachepos; + pos = (pos - 1) % StackCacheSize; + v = g->m->stackcache[pos]; + g->m->stackcachepos = pos; + g->m->stackcachecnt--; + g->m->stackinuse++; + malloced = false; + } else + v = runtime·mallocgc(n, 0, FlagNoProfiling|FlagNoGC|FlagNoZero|FlagNoInvokeGC); + top = (Stktop*)((byte*)v+n-sizeof(Stktop)); runtime·memclr((byte*)top, sizeof(*top)); - if(StackDebug >= 1) - runtime·printf(" allocated %p\n", v); + top->malloced = malloced; return v; } void runtime·stackfree(G *gp, void *v, Stktop *top) { - uint8 order; - uintptr n, n2; - MSpan *s; - MLink *x; - MCache *c; + uint32 pos; + uintptr n; n = (uintptr)(top+1) - (uintptr)v; - if(n & (n-1)) - runtime·throw("stack not a power of 2"); if(StackDebug >= 1) runtime·printf("stackfree %p %d\n", v, (int32)n); gp->stacksize -= n; @@ -259,34 +155,19 @@ runtime·stackfree(G *gp, void *v, Stktop *top) runtime·SysFree(v, n, &mstats.stacks_sys); return; } - if(StackCache && n < FixedStack << NumStackOrders) { - order = 0; - n2 = n; - while(n2 > FixedStack) { - order++; - n2 >>= 1; - } - x = (MLink*)v; - c = g->m->mcache; - if(c == nil) { - runtime·lock(&stackpoolmu); - poolfree(x, order); - runtime·unlock(&stackpoolmu); - } else { - if(c->stackcache[order].size >= StackCacheSize) - stackcacherelease(c, order); - x->next = c->stackcache[order].list; - c->stackcache[order].list = x; - c->stackcache[order].size += n; - } - } else { - s = runtime·MHeap_Lookup(&runtime·mheap, v); - if(s->state != MSpanStack) { - runtime·printf("%p %p\n", s->start<<PageShift, v); - runtime·throw("bad span state"); - } - runtime·MHeap_FreeStack(&runtime·mheap, s); + if(top->malloced) { + runtime·free(v); + return; } + if(n != FixedStack) + runtime·throw("stackfree: bad fixed size"); + if(g->m->stackcachecnt == StackCacheSize) + stackcacherelease(); + pos = g->m->stackcachepos; + g->m->stackcache[pos] = v; + g->m->stackcachepos = (pos + 1) % StackCacheSize; + g->m->stackcachecnt++; + g->m->stackinuse--; } // Called from runtime·lessstack when returning from a function which @@ -718,6 +599,7 @@ copystack(G *gp, uintptr nframes, uintptr newsize) uintptr oldsize, used; AdjustInfo adjinfo; Stktop *oldtop, *newtop; + bool malloced; if(gp->syscallstack != 0) runtime·throw("can't handle stack copy in syscall yet"); @@ -731,9 +613,10 @@ copystack(G *gp, uintptr nframes, uintptr newsize) newstk = runtime·stackalloc(gp, newsize); newbase = newstk + newsize; newtop = (Stktop*)(newbase - sizeof(Stktop)); + malloced = newtop->malloced; if(StackDebug >= 1) - runtime·printf("copystack gp=%p [%p %p]/%d -> [%p %p]/%d\n", gp, oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize); + runtime·printf("copystack [%p %p]/%d -> [%p %p]/%d\n", oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize); USED(oldsize); // adjust pointers in the to-be-copied frames @@ -748,6 +631,7 @@ copystack(G *gp, uintptr nframes, uintptr newsize) // copy the stack (including Stktop) to the new location runtime·memmove(newbase - used, oldbase - used, used); + newtop->malloced = malloced; // Swap out old stack for new one gp->stackbase = (uintptr)newtop; @@ -908,7 +792,7 @@ runtime·newstack(void) top = (Stktop*)(stk+framesize-sizeof(*top)); if(StackDebug >= 1) { - runtime·printf("\t-> new stack gp=%p [%p, %p]\n", gp, stk, top); + runtime·printf("\t-> new stack [%p, %p]\n", stk, top); } top->stackbase = gp->stackbase; @@ -997,6 +881,7 @@ runtime·shrinkstack(G *gp) int32 nframes; byte *oldstk, *oldbase; uintptr used, oldsize, newsize; + MSpan *span; if(!runtime·copystack) return; @@ -1010,14 +895,53 @@ runtime·shrinkstack(G *gp) if(used >= oldsize / 4) return; // still using at least 1/4 of the segment. - if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case? - return; + // To shrink to less than 1/2 a page, we need to copy. + if(newsize < PageSize/2) { + if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case? + return; #ifdef GOOS_windows - if(gp->m != nil && gp->m->libcallsp != 0) - return; + if(gp->m != nil && gp->m->libcallsp != 0) + return; #endif - nframes = copyabletopsegment(gp); - if(nframes == -1) + nframes = copyabletopsegment(gp); + if(nframes == -1) + return; + copystack(gp, nframes, newsize); return; - copystack(gp, nframes, newsize); + } + + // To shrink a stack of one page size or more, we can shrink it + // without copying. Just deallocate the lower half. + span = runtime·MHeap_LookupMaybe(&runtime·mheap, oldstk); + if(span == nil) + return; // stack allocated outside heap. Can't shrink it. Can happen if stack is allocated while inside malloc. TODO: shrink by copying? + if(span->elemsize != oldsize) + runtime·throw("span element size doesn't match stack size"); + if((uintptr)oldstk != span->start << PageShift) + runtime·throw("stack not at start of span"); + + if(StackDebug) + runtime·printf("shrinking stack in place %p %X->%X\n", oldstk, oldsize, newsize); + + // new stack guard for smaller stack + gp->stackguard = (uintptr)oldstk + newsize + StackGuard; + gp->stackguard0 = (uintptr)oldstk + newsize + StackGuard; + if(gp->stack0 == (uintptr)oldstk) + gp->stack0 = (uintptr)oldstk + newsize; + gp->stacksize -= oldsize - newsize; + + // Free bottom half of the stack. + if(runtime·debug.efence || StackFromSystem) { + if(runtime·debug.efence || StackFaultOnFree) + runtime·SysFault(oldstk, newsize); + else + runtime·SysFree(oldstk, newsize, &mstats.stacks_sys); + return; + } + // First, we trick malloc into thinking + // we allocated the stack as two separate half-size allocs. Then the + // free() call does the rest of the work for us. + runtime·MSpan_EnsureSwept(span); + runtime·MHeap_SplitSpan(&runtime·mheap, span); + runtime·free(oldstk); } |
