diff options
| author | Russ Cox <rsc@golang.org> | 2013-02-20 17:48:23 -0500 |
|---|---|---|
| committer | Russ Cox <rsc@golang.org> | 2013-02-20 17:48:23 -0500 |
| commit | 6c976393aea607e67f4d31e3a2ae7b3c0dc15ade (patch) | |
| tree | 7c168ce818eec974c9f28584a6497c616f4d8e6f /src/pkg/runtime/proc.c | |
| parent | 43da336b151993fa3b0d17dc443f5ba9d29d482f (diff) | |
| download | go-6c976393aea607e67f4d31e3a2ae7b3c0dc15ade.tar.xz | |
runtime: allow cgo callbacks on non-Go threads
Fixes #4435.
R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104
Diffstat (limited to 'src/pkg/runtime/proc.c')
| -rw-r--r-- | src/pkg/runtime/proc.c | 229 |
1 files changed, 219 insertions, 10 deletions
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index b83bd9066f..67d6dad488 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -25,6 +25,7 @@ int32 runtime·gcwaiting; G* runtime·allg; G* runtime·lastg; M* runtime·allm; +M* runtime·extram; int8* runtime·goos; int32 runtime·ncpu; @@ -792,8 +793,11 @@ runtime·mstart(void) // Install signal handlers; after minit so that minit can // prepare the thread to be able to handle the signals. - if(m == &runtime·m0) + if(m == &runtime·m0) { runtime·initsig(); + if(runtime·iscgo) + runtime·newextram(); + } schedule(nil); @@ -838,9 +842,9 @@ matchmg(void) } } -// Create a new m. It will start off with a call to runtime·mstart. +// Allocate a new m unassociated with any thread. M* -runtime·newm(void) +runtime·allocm(void) { M *mp; static Type *mtype; // The Go type M @@ -854,23 +858,228 @@ runtime·newm(void) mp = runtime·cnew(mtype); mcommoninit(mp); + if(runtime·iscgo || Windows) + mp->g0 = runtime·malg(-1); + else + mp->g0 = runtime·malg(8192); + + return mp; +} + +static M* lockextra(bool nilokay); +static void unlockextra(M*); + +// needm is called when a cgo callback happens on a +// thread without an m (a thread not created by Go). +// In this case, needm is expected to find an m to use +// and return with m, g initialized correctly. +// Since m and g are not set now (likely nil, but see below) +// needm is limited in what routines it can call. In particular +// it can only call nosplit functions (textflag 7) and cannot +// do any scheduling that requires an m. +// +// In order to avoid needing heavy lifting here, we adopt +// the following strategy: there is a stack of available m's +// that can be stolen. Using compare-and-swap +// to pop from the stack has ABA races, so we simulate +// a lock by doing an exchange (via casp) to steal the stack +// head and replace the top pointer with MLOCKED (1). +// This serves as a simple spin lock that we can use even +// without an m. The thread that locks the stack in this way +// unlocks the stack by storing a valid stack head pointer. +// +// In order to make sure that there is always an m structure +// available to be stolen, we maintain the invariant that there +// is always one more than needed. At the beginning of the +// program (if cgo is in use) the list is seeded with a single m. +// If needm finds that it has taken the last m off the list, its job +// is - once it has installed its own m so that it can do things like +// allocate memory - to create a spare m and put it on the list. +// +// Each of these extra m's also has a g0 and a curg that are +// pressed into service as the scheduling stack and current +// goroutine for the duration of the cgo callback. +// +// When the callback is done with the m, it calls dropm to +// put the m back on the list. +#pragma textflag 7 +void +runtime·needm(byte x) +{ + M *mp; + + // Lock extra list, take head, unlock popped list. + // nilokay=false is safe here because of the invariant above, + // that the extra list always contains or will soon contain + // at least one m. + mp = lockextra(false); + + // Set needextram when we've just emptied the list, + // so that the eventual call into cgocallbackg will + // allocate a new m for the extra list. We delay the + // allocation until then so that it can be done + // after exitsyscall makes sure it is okay to be + // running at all (that is, there's no garbage collection + // running right now). + mp->needextram = mp->schedlink == nil; + unlockextra(mp->schedlink); + + // Install m and g (= m->g0) and set the stack bounds + // to match the current stack. We don't actually know + // how big the stack is, like we don't know how big any + // scheduling stack is, but we assume there's at least 32 kB, + // which is more than enough for us. + runtime·setmg(mp, mp->g0); + g->stackbase = (uintptr)(&x + 1024); + g->stackguard = (uintptr)(&x - 32*1024); + + // On windows/386, we need to put an SEH frame (two words) + // somewhere on the current stack. We are called + // from needm, and we know there is some available + // space one word into the argument frame. Use that. + m->seh = (SEH*)((uintptr*)&x + 1); + + // Initialize this thread to use the m. + runtime·asminit(); + runtime·minit(); +} + +// newextram allocates an m and puts it on the extra list. +// It is called with a working local m, so that it can do things +// like call schedlock and allocate. +void +runtime·newextram(void) +{ + M *mp, *mnext; + G *gp; + + // Scheduler protects allocation of new m's and g's. + // Create extra goroutine locked to extra m. + // The goroutine is the context in which the cgo callback will run. + // The sched.pc will never be returned to, but setting it to + // runtime.goexit makes clear to the traceback routines where + // the goroutine stack ends. + schedlock(); + mp = runtime·allocm(); + gp = runtime·malg(4096); + gp->sched.pc = (void*)runtime·goexit; + gp->sched.sp = gp->stackbase; + gp->sched.g = gp; + gp->status = Gsyscall; + mp->curg = gp; + mp->locked = LockInternal; + mp->lockedg = gp; + gp->lockedm = mp; + schedunlock(); + + // Add m to the extra list. + mnext = lockextra(true); + mp->schedlink = mnext; + unlockextra(mp); +} + +// dropm is called when a cgo callback has called needm but is now +// done with the callback and returning back into the non-Go thread. +// It puts the current m back onto the extra list. +// +// The main expense here is the call to signalstack to release the +// m's signal stack, and then the call to needm on the next callback +// from this thread. It is tempting to try to save the m for next time, +// which would eliminate both these costs, but there might not be +// a next time: the current thread (which Go does not control) might exit. +// If we saved the m for that thread, there would be an m leak each time +// such a thread exited. Instead, we acquire and release an m on each +// call. These should typically not be scheduling operations, just a few +// atomics, so the cost should be small. +// +// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread +// variable using pthread_key_create. Unlike the pthread keys we already use +// on OS X, this dummy key would never be read by Go code. It would exist +// only so that we could register at thread-exit-time destructor. +// That destructor would put the m back onto the extra list. +// This is purely a performance optimization. The current version, +// in which dropm happens on each cgo call, is still correct too. +// We may have to keep the current version on systems with cgo +// but without pthreads, like Windows. +void +runtime·dropm(void) +{ + M *mp, *mnext; + + // Undo whatever initialization minit did during needm. + runtime·unminit(); + + // Clear m and g, and return m to the extra list. + // After the call to setmg we can only call nosplit functions. + mp = m; + runtime·setmg(nil, nil); + + mnext = lockextra(true); + mp->schedlink = mnext; + unlockextra(mp); +} + +#define MLOCKED ((M*)1) + +// lockextra locks the extra list and returns the list head. +// The caller must unlock the list by storing a new list head +// to runtime.extram. If nilokay is true, then lockextra will +// return a nil list head if that's what it finds. If nilokay is false, +// lockextra will keep waiting until the list head is no longer nil. +#pragma textflag 7 +static M* +lockextra(bool nilokay) +{ + M *mp; + void (*yield)(void); + + for(;;) { + mp = runtime·atomicloadp(&runtime·extram); + if(mp == MLOCKED) { + yield = runtime·osyield; + yield(); + continue; + } + if(mp == nil && !nilokay) { + runtime·usleep(1); + continue; + } + if(!runtime·casp(&runtime·extram, mp, MLOCKED)) { + yield = runtime·osyield; + yield(); + continue; + } + break; + } + return mp; +} + +#pragma textflag 7 +static void +unlockextra(M *mp) +{ + runtime·atomicstorep(&runtime·extram, mp); +} + + +// Create a new m. It will start off with a call to runtime·mstart. +M* +runtime·newm(void) +{ + M *mp; + + mp = runtime·allocm(); + if(runtime·iscgo) { CgoThreadStart ts; if(libcgo_thread_start == nil) runtime·throw("libcgo_thread_start missing"); - // pthread_create will make us a stack. - mp->g0 = runtime·malg(-1); ts.m = mp; ts.g = mp->g0; ts.fn = runtime·mstart; runtime·asmcgocall(libcgo_thread_start, &ts); } else { - if(Windows) - // windows will layout sched stack on os stack - mp->g0 = runtime·malg(-1); - else - mp->g0 = runtime·malg(8192); runtime·newosproc(mp, mp->g0, (byte*)mp->g0->stackbase, runtime·mstart); } |
