runtime: allow cgo callbacks on non-Go threads

Fixes #4435. R=golang-dev, iant, alex.brainman, minux.ma, dvyukov CC=golang-dev https://golang.org/cl/7304104
author: Russ Cox <rsc@golang.org> 2013-02-20 17:48:23 -0500
committer: Russ Cox <rsc@golang.org> 2013-02-20 17:48:23 -0500
commit: 6c976393aea607e67f4d31e3a2ae7b3c0dc15ade (patch)
tree: 7c168ce818eec974c9f28584a6497c616f4d8e6f /src/pkg/runtime/proc.c
parent: 43da336b151993fa3b0d17dc443f5ba9d29d482f (diff)
download: go-6c976393aea607e67f4d31e3a2ae7b3c0dc15ade.tar.xz
1 files changed, 219 insertions, 10 deletions
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c
index b83bd9066f..67d6dad488 100644
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -25,6 +25,7 @@ int32	runtime·gcwaiting;
 G*	runtime·allg;
 G*	runtime·lastg;
 M*	runtime·allm;
+M*	runtime·extram;
 
 int8*	runtime·goos;
 int32	runtime·ncpu;
@@ -792,8 +793,11 @@ runtime·mstart(void)
 
 	// Install signal handlers; after minit so that minit can
 	// prepare the thread to be able to handle the signals.
-	if(m == &runtime·m0)
+	if(m == &runtime·m0) {
 		runtime·initsig();
+		if(runtime·iscgo)
+			runtime·newextram();
+	}
 
 	schedule(nil);
 
@@ -838,9 +842,9 @@ matchmg(void)
 	}
 }
 
-// Create a new m.  It will start off with a call to runtime·mstart.
+// Allocate a new m unassociated with any thread.
 M*
-runtime·newm(void)
+runtime·allocm(void)
 {
 	M *mp;
 	static Type *mtype;  // The Go type M
@@ -854,23 +858,228 @@ runtime·newm(void)
 	mp = runtime·cnew(mtype);
 	mcommoninit(mp);
 
+	if(runtime·iscgo || Windows)
+		mp->g0 = runtime·malg(-1);
+	else
+		mp->g0 = runtime·malg(8192);
+	
+	return mp;
+}
+
+static M* lockextra(bool nilokay);
+static void unlockextra(M*);
+
+// needm is called when a cgo callback happens on a
+// thread without an m (a thread not created by Go).
+// In this case, needm is expected to find an m to use
+// and return with m, g initialized correctly.
+// Since m and g are not set now (likely nil, but see below)
+// needm is limited in what routines it can call. In particular
+// it can only call nosplit functions (textflag 7) and cannot
+// do any scheduling that requires an m.
+//
+// In order to avoid needing heavy lifting here, we adopt
+// the following strategy: there is a stack of available m's
+// that can be stolen. Using compare-and-swap
+// to pop from the stack has ABA races, so we simulate
+// a lock by doing an exchange (via casp) to steal the stack
+// head and replace the top pointer with MLOCKED (1).
+// This serves as a simple spin lock that we can use even
+// without an m. The thread that locks the stack in this way
+// unlocks the stack by storing a valid stack head pointer.
+//
+// In order to make sure that there is always an m structure
+// available to be stolen, we maintain the invariant that there
+// is always one more than needed. At the beginning of the
+// program (if cgo is in use) the list is seeded with a single m.
+// If needm finds that it has taken the last m off the list, its job
+// is - once it has installed its own m so that it can do things like
+// allocate memory - to create a spare m and put it on the list.
+//
+// Each of these extra m's also has a g0 and a curg that are
+// pressed into service as the scheduling stack and current
+// goroutine for the duration of the cgo callback.
+//
+// When the callback is done with the m, it calls dropm to
+// put the m back on the list.
+#pragma textflag 7
+void
+runtime·needm(byte x)
+{
+	M *mp;
+
+	// Lock extra list, take head, unlock popped list.
+	// nilokay=false is safe here because of the invariant above,
+	// that the extra list always contains or will soon contain
+	// at least one m.
+	mp = lockextra(false);
+
+	// Set needextram when we've just emptied the list,
+	// so that the eventual call into cgocallbackg will
+	// allocate a new m for the extra list. We delay the
+	// allocation until then so that it can be done 
+	// after exitsyscall makes sure it is okay to be
+	// running at all (that is, there's no garbage collection
+	// running right now).	
+	mp->needextram = mp->schedlink == nil;
+	unlockextra(mp->schedlink);
+	
+	// Install m and g (= m->g0) and set the stack bounds
+	// to match the current stack. We don't actually know
+	// how big the stack is, like we don't know how big any
+	// scheduling stack is, but we assume there's at least 32 kB,
+	// which is more than enough for us.
+	runtime·setmg(mp, mp->g0);
+	g->stackbase = (uintptr)(&x + 1024);
+	g->stackguard = (uintptr)(&x - 32*1024);
+
+	// On windows/386, we need to put an SEH frame (two words)
+	// somewhere on the current stack. We are called
+	// from needm, and we know there is some available
+	// space one word into the argument frame. Use that.
+	m->seh = (SEH*)((uintptr*)&x + 1);
+
+	// Initialize this thread to use the m.
+	runtime·asminit();
+	runtime·minit();
+}
+
+// newextram allocates an m and puts it on the extra list.
+// It is called with a working local m, so that it can do things
+// like call schedlock and allocate.
+void
+runtime·newextram(void)
+{
+	M *mp, *mnext;
+	G *gp;
+
+	// Scheduler protects allocation of new m's and g's.
+	// Create extra goroutine locked to extra m.
+	// The goroutine is the context in which the cgo callback will run.
+	// The sched.pc will never be returned to, but setting it to
+	// runtime.goexit makes clear to the traceback routines where
+	// the goroutine stack ends.
+	schedlock();
+	mp = runtime·allocm();
+	gp = runtime·malg(4096);
+	gp->sched.pc = (void*)runtime·goexit;
+	gp->sched.sp = gp->stackbase;
+	gp->sched.g = gp;
+	gp->status = Gsyscall;
+	mp->curg = gp;
+	mp->locked = LockInternal;
+	mp->lockedg = gp;
+	gp->lockedm = mp;
+	schedunlock();
+
+	// Add m to the extra list.
+	mnext = lockextra(true);
+	mp->schedlink = mnext;
+	unlockextra(mp);
+}
+
+// dropm is called when a cgo callback has called needm but is now
+// done with the callback and returning back into the non-Go thread.
+// It puts the current m back onto the extra list.
+//
+// The main expense here is the call to signalstack to release the
+// m's signal stack, and then the call to needm on the next callback
+// from this thread. It is tempting to try to save the m for next time,
+// which would eliminate both these costs, but there might not be 
+// a next time: the current thread (which Go does not control) might exit.
+// If we saved the m for that thread, there would be an m leak each time
+// such a thread exited. Instead, we acquire and release an m on each
+// call. These should typically not be scheduling operations, just a few
+// atomics, so the cost should be small.
+//
+// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
+// variable using pthread_key_create. Unlike the pthread keys we already use
+// on OS X, this dummy key would never be read by Go code. It would exist
+// only so that we could register at thread-exit-time destructor.
+// That destructor would put the m back onto the extra list.
+// This is purely a performance optimization. The current version,
+// in which dropm happens on each cgo call, is still correct too.
+// We may have to keep the current version on systems with cgo
+// but without pthreads, like Windows.
+void
+runtime·dropm(void)
+{
+	M *mp, *mnext;
+
+	// Undo whatever initialization minit did during needm.
+	runtime·unminit();
+
+	// Clear m and g, and return m to the extra list.
+	// After the call to setmg we can only call nosplit functions.
+	mp = m;
+	runtime·setmg(nil, nil);
+
+	mnext = lockextra(true);
+	mp->schedlink = mnext;
+	unlockextra(mp);
+}
+
+#define MLOCKED ((M*)1)
+
+// lockextra locks the extra list and returns the list head.
+// The caller must unlock the list by storing a new list head
+// to runtime.extram. If nilokay is true, then lockextra will
+// return a nil list head if that's what it finds. If nilokay is false,
+// lockextra will keep waiting until the list head is no longer nil.
+#pragma textflag 7
+static M*
+lockextra(bool nilokay)
+{
+	M *mp;
+	void (*yield)(void);
+	
+	for(;;) {
+		mp = runtime·atomicloadp(&runtime·extram);
+		if(mp == MLOCKED) {
+			yield = runtime·osyield;
+			yield();
+			continue;
+		}
+		if(mp == nil && !nilokay) {
+			runtime·usleep(1);
+			continue;
+		}
+		if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
+			yield = runtime·osyield;
+			yield();
+			continue;
+		}
+		break;
+	}
+	return mp;
+}
+
+#pragma textflag 7
+static void
+unlockextra(M *mp)
+{
+	runtime·atomicstorep(&runtime·extram, mp);
+}
+
+
+// Create a new m.  It will start off with a call to runtime·mstart.
+M*
+runtime·newm(void)
+{
+	M *mp;
+	
+	mp = runtime·allocm();
+
 	if(runtime·iscgo) {
 		CgoThreadStart ts;
 
 		if(libcgo_thread_start == nil)
 			runtime·throw("libcgo_thread_start missing");
-		// pthread_create will make us a stack.
-		mp->g0 = runtime·malg(-1);
 		ts.m = mp;
 		ts.g = mp->g0;
 		ts.fn = runtime·mstart;
 		runtime·asmcgocall(libcgo_thread_start, &ts);
 	} else {
-		if(Windows)
-			// windows will layout sched stack on os stack
-			mp->g0 = runtime·malg(-1);
-		else
-			mp->g0 = runtime·malg(8192);
 		runtime·newosproc(mp, mp->g0, (byte*)mp->g0->stackbase, runtime·mstart);
 	}
author	Russ Cox <rsc@golang.org>	2013-02-20 17:48:23 -0500
committer	Russ Cox <rsc@golang.org>	2013-02-20 17:48:23 -0500
commit	6c976393aea607e67f4d31e3a2ae7b3c0dc15ade (patch)
tree	7c168ce818eec974c9f28584a6497c616f4d8e6f /src/pkg/runtime/proc.c
parent	43da336b151993fa3b0d17dc443f5ba9d29d482f (diff)
download	go-6c976393aea607e67f4d31e3a2ae7b3c0dc15ade.tar.xz