aboutsummaryrefslogtreecommitdiff
path: root/src/pkg/runtime/stack.c
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2014-07-17 14:41:46 -0700
committerKeith Randall <khr@golang.org>2014-07-17 14:41:46 -0700
commitf378f300345431204d5842751db2add7994d9957 (patch)
treedc343a417545556013848a33fd49797d2197582e /src/pkg/runtime/stack.c
parent6b2aabeecc1db46d030b9c5c5553c4e0fabba0cf (diff)
downloadgo-f378f300345431204d5842751db2add7994d9957.tar.xz
undo CL 101570044 / 2c57aaea79c4
redo stack allocation. This is mostly the same as the original CL with a few bug fixes. 1. add racemalloc() for stack allocations 2. fix poolalloc/poolfree to terminate free lists correctly. 3. adjust span ref count correctly. 4. don't use cache for sizes >= StackCacheSize. Should fix bugs and memory leaks in original changelist. ««« original CL description undo CL 104200047 / 318b04f28372 Breaks windows and race detector. TBR=rsc ««« original CL description runtime: stack allocator, separate from mallocgc In order to move malloc to Go, we need to have a separate stack allocator. If we run out of stack during malloc, malloc will not be available to allocate a new stack. Stacks are the last remaining FlagNoGC objects in the GC heap. Once they are out, we can get rid of the distinction between the allocated/blockboundary bits. (This will be in a separate change.) Fixes #7468 Fixes #7424 LGTM=rsc, dvyukov R=golang-codereviews, dvyukov, khr, dave, rsc CC=golang-codereviews https://golang.org/cl/104200047 »»» TBR=rsc CC=golang-codereviews https://golang.org/cl/101570044 »»» LGTM=dvyukov R=dvyukov, dave, khr, alex.brainman CC=golang-codereviews https://golang.org/cl/112240044
Diffstat (limited to 'src/pkg/runtime/stack.c')
-rw-r--r--src/pkg/runtime/stack.c377
1 files changed, 241 insertions, 136 deletions
diff --git a/src/pkg/runtime/stack.c b/src/pkg/runtime/stack.c
index a07042111e..1a502bd6e7 100644
--- a/src/pkg/runtime/stack.c
+++ b/src/pkg/runtime/stack.c
@@ -9,6 +9,7 @@
#include "funcdata.h"
#include "typekind.h"
#include "type.h"
+#include "race.h"
#include "../../cmd/ld/textflag.h"
enum
@@ -21,76 +22,176 @@ enum
StackDebug = 0,
StackFromSystem = 0, // allocate stacks from system memory instead of the heap
StackFaultOnFree = 0, // old stacks are mapped noaccess to detect use after free
+
+ StackCache = 1,
};
-typedef struct StackCacheNode StackCacheNode;
-struct StackCacheNode
+// Global pool of spans that have free stacks.
+// Stacks are assigned an order according to size.
+// order = log_2(size/FixedStack)
+// There is a free list for each order.
+static MSpan stackpool[NumStackOrders];
+static Lock stackpoolmu;
+// TODO: one lock per order?
+
+void
+runtime·stackinit(void)
{
- StackCacheNode *next;
- void* batch[StackCacheBatch-1];
-};
+ int32 i;
-static StackCacheNode *stackcache;
-static Lock stackcachemu;
+ if((StackCacheSize & PageMask) != 0)
+ runtime·throw("cache size must be a multiple of page size");
+
+ for(i = 0; i < NumStackOrders; i++)
+ runtime·MSpanList_Init(&stackpool[i]);
+}
+
+// Allocates a stack from the free pool. Must be called with
+// stackpoolmu held.
+static MLink*
+poolalloc(uint8 order)
+{
+ MSpan *list;
+ MSpan *s;
+ MLink *x;
+ uintptr i;
-// stackcacherefill/stackcacherelease implement a global cache of stack segments.
-// The cache is required to prevent unlimited growth of per-thread caches.
+ list = &stackpool[order];
+ s = list->next;
+ if(s == list) {
+ // no free stacks. Allocate another span worth.
+ s = runtime·MHeap_AllocStack(&runtime·mheap, StackCacheSize >> PageShift);
+ if(s == nil)
+ runtime·throw("out of memory");
+ if(s->ref != 0)
+ runtime·throw("bad ref");
+ if(s->freelist != nil)
+ runtime·throw("bad freelist");
+ for(i = 0; i < StackCacheSize; i += FixedStack << order) {
+ x = (MLink*)((s->start << PageShift) + i);
+ x->next = s->freelist;
+ s->freelist = x;
+ }
+ runtime·MSpanList_Insert(list, s);
+ }
+ x = s->freelist;
+ if(x == nil)
+ runtime·throw("span has no free stacks");
+ s->freelist = x->next;
+ s->ref++;
+ if(s->freelist == nil) {
+ // all stacks in s are allocated.
+ runtime·MSpanList_Remove(s);
+ }
+ return x;
+}
+
+// Adds stack x to the free pool. Must be called with stackpoolmu held.
static void
-stackcacherefill(void)
+poolfree(MLink *x, uint8 order)
{
- StackCacheNode *n;
- int32 i, pos;
+ MSpan *s;
- runtime·lock(&stackcachemu);
- n = stackcache;
- if(n)
- stackcache = n->next;
- runtime·unlock(&stackcachemu);
- if(n == nil) {
- n = (StackCacheNode*)runtime·SysAlloc(FixedStack*StackCacheBatch, &mstats.stacks_sys);
- if(n == nil)
- runtime·throw("out of memory (stackcacherefill)");
- for(i = 0; i < StackCacheBatch-1; i++)
- n->batch[i] = (byte*)n + (i+1)*FixedStack;
+ s = runtime·MHeap_Lookup(&runtime·mheap, x);
+ if(s->state != MSpanStack)
+ runtime·throw("freeing stack not in a stack span");
+ if(s->freelist == nil) {
+ // s will now have a free stack
+ runtime·MSpanList_Insert(&stackpool[order], s);
+ }
+ x->next = s->freelist;
+ s->freelist = x;
+ s->ref--;
+ if(s->ref == 0) {
+ // span is completely free - return to heap
+ runtime·MSpanList_Remove(s);
+ s->freelist = nil;
+ runtime·MHeap_FreeStack(&runtime·mheap, s);
}
- pos = g->m->stackcachepos;
- for(i = 0; i < StackCacheBatch-1; i++) {
- g->m->stackcache[pos] = n->batch[i];
- pos = (pos + 1) % StackCacheSize;
+}
+
+// stackcacherefill/stackcacherelease implement a global pool of stack segments.
+// The pool is required to prevent unlimited growth of per-thread caches.
+static void
+stackcacherefill(MCache *c, uint8 order)
+{
+ MLink *x, *list;
+ uintptr size;
+
+ if(StackDebug >= 1)
+ runtime·printf("stackcacherefill order=%d\n", order);
+
+ // Grab some stacks from the global cache.
+ // Grab half of the allowed capacity (to prevent thrashing).
+ list = nil;
+ size = 0;
+ runtime·lock(&stackpoolmu);
+ while(size < StackCacheSize/2) {
+ x = poolalloc(order);
+ x->next = list;
+ list = x;
+ size += FixedStack << order;
}
- g->m->stackcache[pos] = n;
- pos = (pos + 1) % StackCacheSize;
- g->m->stackcachepos = pos;
- g->m->stackcachecnt += StackCacheBatch;
+ runtime·unlock(&stackpoolmu);
+
+ c->stackcache[order].list = list;
+ c->stackcache[order].size = size;
}
static void
-stackcacherelease(void)
+stackcacherelease(MCache *c, uint8 order)
{
- StackCacheNode *n;
- uint32 i, pos;
+ MLink *x, *y;
+ uintptr size;
- pos = (g->m->stackcachepos - g->m->stackcachecnt) % StackCacheSize;
- n = (StackCacheNode*)g->m->stackcache[pos];
- pos = (pos + 1) % StackCacheSize;
- for(i = 0; i < StackCacheBatch-1; i++) {
- n->batch[i] = g->m->stackcache[pos];
- pos = (pos + 1) % StackCacheSize;
+ if(StackDebug >= 1)
+ runtime·printf("stackcacherelease order=%d\n", order);
+ x = c->stackcache[order].list;
+ size = c->stackcache[order].size;
+ runtime·lock(&stackpoolmu);
+ while(size > StackCacheSize/2) {
+ y = x->next;
+ poolfree(x, order);
+ x = y;
+ size -= FixedStack << order;
}
- g->m->stackcachecnt -= StackCacheBatch;
- runtime·lock(&stackcachemu);
- n->next = stackcache;
- stackcache = n;
- runtime·unlock(&stackcachemu);
+ runtime·unlock(&stackpoolmu);
+ c->stackcache[order].list = x;
+ c->stackcache[order].size = size;
+}
+
+void
+runtime·stackcache_clear(MCache *c)
+{
+ uint8 order;
+ MLink *x, *y;
+
+ if(StackDebug >= 1)
+ runtime·printf("stackcache clear\n");
+ runtime·lock(&stackpoolmu);
+ for(order = 0; order < NumStackOrders; order++) {
+ x = c->stackcache[order].list;
+ while(x != nil) {
+ y = x->next;
+ poolfree(x, order);
+ x = y;
+ }
+ c->stackcache[order].list = nil;
+ c->stackcache[order].size = 0;
+ }
+ runtime·unlock(&stackpoolmu);
}
void*
runtime·stackalloc(G *gp, uint32 n)
{
- uint32 pos;
+ uint8 order;
+ uint32 n2;
void *v;
- bool malloced;
Stktop *top;
+ MLink *x;
+ MSpan *s;
+ MCache *c;
// Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs.
@@ -110,41 +211,60 @@ runtime·stackalloc(G *gp, uint32 n)
return v;
}
- // Minimum-sized stacks are allocated with a fixed-size free-list allocator,
- // but if we need a stack of a bigger size, we fall back on malloc
- // (assuming that inside malloc all the stack frames are small,
- // so that we do not deadlock).
- malloced = true;
- if(n == FixedStack || g->m->mallocing) {
- if(n != FixedStack) {
- runtime·printf("stackalloc: in malloc, size=%d want %d\n", FixedStack, n);
- runtime·throw("stackalloc");
+ // Small stacks are allocated with a fixed-size free-list allocator.
+ // If we need a stack of a bigger size, we fall back on allocating
+ // a dedicated span.
+ if(StackCache && n < FixedStack << NumStackOrders && n < StackCacheSize) {
+ order = 0;
+ n2 = n;
+ while(n2 > FixedStack) {
+ order++;
+ n2 >>= 1;
}
- if(g->m->stackcachecnt == 0)
- stackcacherefill();
- pos = g->m->stackcachepos;
- pos = (pos - 1) % StackCacheSize;
- v = g->m->stackcache[pos];
- g->m->stackcachepos = pos;
- g->m->stackcachecnt--;
- g->m->stackinuse++;
- malloced = false;
- } else
- v = runtime·mallocgc(n, 0, FlagNoProfiling|FlagNoGC|FlagNoZero|FlagNoInvokeGC);
-
+ c = g->m->mcache;
+ if(c == nil) {
+ // This can happen in the guts of exitsyscall or
+ // procresize. Just get a stack from the global pool.
+ runtime·lock(&stackpoolmu);
+ x = poolalloc(order);
+ runtime·unlock(&stackpoolmu);
+ } else {
+ x = c->stackcache[order].list;
+ if(x == nil) {
+ stackcacherefill(c, order);
+ x = c->stackcache[order].list;
+ }
+ c->stackcache[order].list = x->next;
+ c->stackcache[order].size -= n;
+ }
+ v = (byte*)x;
+ } else {
+ s = runtime·MHeap_AllocStack(&runtime·mheap, ROUND(n, PageSize) >> PageShift);
+ if(s == nil)
+ runtime·throw("out of memory");
+ v = (byte*)(s->start<<PageShift);
+ }
top = (Stktop*)((byte*)v+n-sizeof(Stktop));
runtime·memclr((byte*)top, sizeof(*top));
- top->malloced = malloced;
+ if(raceenabled)
+ runtime·racemalloc(v, n);
+ if(StackDebug >= 1)
+ runtime·printf(" allocated %p\n", v);
return v;
}
void
runtime·stackfree(G *gp, void *v, Stktop *top)
{
- uint32 pos;
- uintptr n;
-
+ uint8 order;
+ uintptr n, n2;
+ MSpan *s;
+ MLink *x;
+ MCache *c;
+
n = (uintptr)(top+1) - (uintptr)v;
+ if(n & (n-1))
+ runtime·throw("stack not a power of 2");
if(StackDebug >= 1)
runtime·printf("stackfree %p %d\n", v, (int32)n);
gp->stacksize -= n;
@@ -155,19 +275,34 @@ runtime·stackfree(G *gp, void *v, Stktop *top)
runtime·SysFree(v, n, &mstats.stacks_sys);
return;
}
- if(top->malloced) {
- runtime·free(v);
- return;
+ if(StackCache && n < FixedStack << NumStackOrders && n < StackCacheSize) {
+ order = 0;
+ n2 = n;
+ while(n2 > FixedStack) {
+ order++;
+ n2 >>= 1;
+ }
+ x = (MLink*)v;
+ c = g->m->mcache;
+ if(c == nil) {
+ runtime·lock(&stackpoolmu);
+ poolfree(x, order);
+ runtime·unlock(&stackpoolmu);
+ } else {
+ if(c->stackcache[order].size >= StackCacheSize)
+ stackcacherelease(c, order);
+ x->next = c->stackcache[order].list;
+ c->stackcache[order].list = x;
+ c->stackcache[order].size += n;
+ }
+ } else {
+ s = runtime·MHeap_Lookup(&runtime·mheap, v);
+ if(s->state != MSpanStack) {
+ runtime·printf("%p %p\n", s->start<<PageShift, v);
+ runtime·throw("bad span state");
+ }
+ runtime·MHeap_FreeStack(&runtime·mheap, s);
}
- if(n != FixedStack)
- runtime·throw("stackfree: bad fixed size");
- if(g->m->stackcachecnt == StackCacheSize)
- stackcacherelease();
- pos = g->m->stackcachepos;
- g->m->stackcache[pos] = v;
- g->m->stackcachepos = (pos + 1) % StackCacheSize;
- g->m->stackcachecnt++;
- g->m->stackinuse--;
}
// Called from runtime·lessstack when returning from a function which
@@ -186,6 +321,8 @@ runtime·oldstack(void)
gp = g->m->curg;
top = (Stktop*)gp->stackbase;
+ if(top == nil)
+ runtime·throw("nil stackbase");
old = (byte*)gp->stackguard - StackGuard;
sp = (byte*)top;
argsize = top->argsize;
@@ -324,6 +461,8 @@ copyabletopsegment(G *gp)
FuncVal *fn;
StackMap *stackmap;
+ if(gp->stackbase == 0)
+ runtime·throw("stackbase == 0");
cinfo.stk = (byte*)gp->stackguard - StackGuard;
cinfo.base = (byte*)gp->stackbase + sizeof(Stktop);
cinfo.frames = 0;
@@ -599,11 +738,12 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
uintptr oldsize, used;
AdjustInfo adjinfo;
Stktop *oldtop, *newtop;
- bool malloced;
if(gp->syscallstack != 0)
runtime·throw("can't handle stack copy in syscall yet");
oldstk = (byte*)gp->stackguard - StackGuard;
+ if(gp->stackbase == 0)
+ runtime·throw("nil stackbase");
oldbase = (byte*)gp->stackbase + sizeof(Stktop);
oldsize = oldbase - oldstk;
used = oldbase - (byte*)gp->sched.sp;
@@ -613,10 +753,9 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
newstk = runtime·stackalloc(gp, newsize);
newbase = newstk + newsize;
newtop = (Stktop*)(newbase - sizeof(Stktop));
- malloced = newtop->malloced;
if(StackDebug >= 1)
- runtime·printf("copystack [%p %p]/%d -> [%p %p]/%d\n", oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize);
+ runtime·printf("copystack gp=%p [%p %p]/%d -> [%p %p]/%d\n", gp, oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize);
USED(oldsize);
// adjust pointers in the to-be-copied frames
@@ -631,7 +770,6 @@ copystack(G *gp, uintptr nframes, uintptr newsize)
// copy the stack (including Stktop) to the new location
runtime·memmove(newbase - used, oldbase - used, used);
- newtop->malloced = malloced;
// Swap out old stack for new one
gp->stackbase = (uintptr)newtop;
@@ -707,6 +845,8 @@ runtime·newstack(void)
if(!newstackcall)
runtime·rewindmorestack(&gp->sched);
+ if(gp->stackbase == 0)
+ runtime·throw("nil stackbase");
sp = gp->sched.sp;
if(thechar == '6' || thechar == '8') {
// The call to morestack cost a word.
@@ -792,7 +932,7 @@ runtime·newstack(void)
top = (Stktop*)(stk+framesize-sizeof(*top));
if(StackDebug >= 1) {
- runtime·printf("\t-> new stack [%p, %p]\n", stk, top);
+ runtime·printf("\t-> new stack gp=%p [%p, %p]\n", gp, stk, top);
}
top->stackbase = gp->stackbase;
@@ -881,10 +1021,14 @@ runtime·shrinkstack(G *gp)
int32 nframes;
byte *oldstk, *oldbase;
uintptr used, oldsize, newsize;
- MSpan *span;
if(!runtime·copystack)
return;
+ if(gp->status == Gdead)
+ return;
+ if(gp->stackbase == 0)
+ runtime·throw("stackbase == 0");
+ //return; // TODO: why does this happen?
oldstk = (byte*)gp->stackguard - StackGuard;
oldbase = (byte*)gp->stackbase + sizeof(Stktop);
oldsize = oldbase - oldstk;
@@ -895,53 +1039,14 @@ runtime·shrinkstack(G *gp)
if(used >= oldsize / 4)
return; // still using at least 1/4 of the segment.
- // To shrink to less than 1/2 a page, we need to copy.
- if(newsize < PageSize/2) {
- if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case?
- return;
+ if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case?
+ return;
#ifdef GOOS_windows
- if(gp->m != nil && gp->m->libcallsp != 0)
- return;
-#endif
- nframes = copyabletopsegment(gp);
- if(nframes == -1)
- return;
- copystack(gp, nframes, newsize);
+ if(gp->m != nil && gp->m->libcallsp != 0)
return;
- }
-
- // To shrink a stack of one page size or more, we can shrink it
- // without copying. Just deallocate the lower half.
- span = runtime·MHeap_LookupMaybe(&runtime·mheap, oldstk);
- if(span == nil)
- return; // stack allocated outside heap. Can't shrink it. Can happen if stack is allocated while inside malloc. TODO: shrink by copying?
- if(span->elemsize != oldsize)
- runtime·throw("span element size doesn't match stack size");
- if((uintptr)oldstk != span->start << PageShift)
- runtime·throw("stack not at start of span");
-
- if(StackDebug)
- runtime·printf("shrinking stack in place %p %X->%X\n", oldstk, oldsize, newsize);
-
- // new stack guard for smaller stack
- gp->stackguard = (uintptr)oldstk + newsize + StackGuard;
- gp->stackguard0 = (uintptr)oldstk + newsize + StackGuard;
- if(gp->stack0 == (uintptr)oldstk)
- gp->stack0 = (uintptr)oldstk + newsize;
- gp->stacksize -= oldsize - newsize;
-
- // Free bottom half of the stack.
- if(runtime·debug.efence || StackFromSystem) {
- if(runtime·debug.efence || StackFaultOnFree)
- runtime·SysFault(oldstk, newsize);
- else
- runtime·SysFree(oldstk, newsize, &mstats.stacks_sys);
+#endif
+ nframes = copyabletopsegment(gp);
+ if(nframes == -1)
return;
- }
- // First, we trick malloc into thinking
- // we allocated the stack as two separate half-size allocs. Then the
- // free() call does the rest of the work for us.
- runtime·MSpan_EnsureSwept(span);
- runtime·MHeap_SplitSpan(&runtime·mheap, span);
- runtime·free(oldstk);
+ copystack(gp, nframes, newsize);
}