runtime: allow futex OSes to use sema-based mutex

Implement sema{create,sleep,wakeup} in terms of the futex syscall when available. Split the lock2/unlock2 implementations out of lock_sema.go and lock_futex.go (which they shared with runtime.note) to allow swapping in new implementations of those. Let futex-based platforms use the semaphore-based mutex implementation. Control that via the new "spinbitmutex" GOEXPERMENT value, disabled by default. This lays the groundwork for a "spinbit" mutex implementation; it does not include the new mutex implementation. For #68578. Change-Id: I091289c85124212a87abec7079ecbd9e610b4270 Reviewed-on: https://go-review.googlesource.com/c/go/+/622996 Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
author: Rhys Hiltner <rhys.hiltner@gmail.com> 2024-10-28 14:01:54 -0700
committer: Michael Knyszek <mknyszek@google.com> 2024-11-15 21:15:59 +0000
commit: 18c2461af38e93ed385e953f3336fcaaca2da727 (patch)
tree: ca2437f778f0d63947692e38936c3708b416ec8b /src/runtime
parent: 252e9def65cd6230447fc11046d7f4b176ae2d4b (diff)
download: go-18c2461af38e93ed385e953f3336fcaaca2da727.tar.xz
10 files changed, 327 insertions, 251 deletions
diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go
index 58690e45e4..1cf146b43c 100644
--- a/src/runtime/lock_futex.go
+++ b/src/runtime/lock_futex.go
@@ -11,32 +11,6 @@ import (
 	"unsafe"
 )
 
-// This implementation depends on OS-specific implementations of
-//
-//	futexsleep(addr *uint32, val uint32, ns int64)
-//		Atomically,
-//			if *addr == val { sleep }
-//		Might be woken up spuriously; that's allowed.
-//		Don't sleep longer than ns; ns < 0 means forever.
-//
-//	futexwakeup(addr *uint32, cnt uint32)
-//		If any procs are sleeping on addr, wake up at most cnt.
-
-const (
-	mutex_unlocked = 0
-	mutex_locked   = 1
-	mutex_sleeping = 2
-
-	active_spin     = 4
-	active_spin_cnt = 30
-	passive_spin    = 1
-)
-
-// Possible lock states are mutex_unlocked, mutex_locked and mutex_sleeping.
-// mutex_sleeping means that there is presumably at least one sleeping thread.
-// Note that there can be spinning threads during all states - they do not
-// affect mutex's state.
-
 // We use the uintptr mutex.key and note.key as a uint32.
 //
 //go:nosplit
@@ -44,103 +18,6 @@ func key32(p *uintptr) *uint32 {
 	return (*uint32)(unsafe.Pointer(p))
 }
 
-func mutexContended(l *mutex) bool {
-	return atomic.Load(key32(&l.key)) > mutex_locked
-}
-
-func lock(l *mutex) {
-	lockWithRank(l, getLockRank(l))
-}
-
-func lock2(l *mutex) {
-	gp := getg()
-
-	if gp.m.locks < 0 {
-		throw("runtime·lock: lock count")
-	}
-	gp.m.locks++
-
-	// Speculative grab for lock.
-	v := atomic.Xchg(key32(&l.key), mutex_locked)
-	if v == mutex_unlocked {
-		return
-	}
-
-	// wait is either MUTEX_LOCKED or MUTEX_SLEEPING
-	// depending on whether there is a thread sleeping
-	// on this mutex. If we ever change l->key from
-	// MUTEX_SLEEPING to some other value, we must be
-	// careful to change it back to MUTEX_SLEEPING before
-	// returning, to ensure that the sleeping thread gets
-	// its wakeup call.
-	wait := v
-
-	timer := &lockTimer{lock: l}
-	timer.begin()
-	// On uniprocessors, no point spinning.
-	// On multiprocessors, spin for ACTIVE_SPIN attempts.
-	spin := 0
-	if ncpu > 1 {
-		spin = active_spin
-	}
-	for {
-		// Try for lock, spinning.
-		for i := 0; i < spin; i++ {
-			for l.key == mutex_unlocked {
-				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
-					timer.end()
-					return
-				}
-			}
-			procyield(active_spin_cnt)
-		}
-
-		// Try for lock, rescheduling.
-		for i := 0; i < passive_spin; i++ {
-			for l.key == mutex_unlocked {
-				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
-					timer.end()
-					return
-				}
-			}
-			osyield()
-		}
-
-		// Sleep.
-		v = atomic.Xchg(key32(&l.key), mutex_sleeping)
-		if v == mutex_unlocked {
-			timer.end()
-			return
-		}
-		wait = mutex_sleeping
-		futexsleep(key32(&l.key), mutex_sleeping, -1)
-	}
-}
-
-func unlock(l *mutex) {
-	unlockWithRank(l)
-}
-
-func unlock2(l *mutex) {
-	v := atomic.Xchg(key32(&l.key), mutex_unlocked)
-	if v == mutex_unlocked {
-		throw("unlock of unlocked lock")
-	}
-	if v == mutex_sleeping {
-		futexwakeup(key32(&l.key), 1)
-	}
-
-	gp := getg()
-	gp.m.mLockProfile.recordUnlock(l)
-	gp.m.locks--
-	if gp.m.locks < 0 {
-		throw("runtime·unlock: lock count")
-	}
-	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
-		gp.stackguard0 = stackPreempt
-	}
-}
-
 // One-time notifications.
 func noteclear(n *note) {
 	n.key = 0
@@ -254,3 +131,33 @@ func beforeIdle(int64, int64) (*g, bool) {
 }
 
 func checkTimeouts() {}
+
+//go:nosplit
+func semacreate(mp *m) {}
+
+//go:nosplit
+func semasleep(ns int64) int32 {
+	mp := getg().m
+
+	for v := atomic.Xadd(&mp.waitsema, -1); ; v = atomic.Load(&mp.waitsema) {
+		if int32(v) >= 0 {
+			return 0
+		}
+		futexsleep(&mp.waitsema, v, ns)
+		if ns >= 0 {
+			if int32(v) >= 0 {
+				return 0
+			} else {
+				return -1
+			}
+		}
+	}
+}
+
+//go:nosplit
+func semawakeup(mp *m) {
+	v := atomic.Xadd(&mp.waitsema, 1)
+	if v == 0 {
+		futexwakeup(&mp.waitsema, 1)
+	}
+}
diff --git a/src/runtime/lock_futex_tristate.go b/src/runtime/lock_futex_tristate.go
new file mode 100644
index 0000000000..dea4323f1e
--- /dev/null
+++ b/src/runtime/lock_futex_tristate.go
@@ -0,0 +1,136 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (dragonfly || freebsd || linux) && !goexperiment.spinbitmutex
+
+package runtime
+
+import (
+	"internal/runtime/atomic"
+)
+
+// This implementation depends on OS-specific implementations of
+//
+//	futexsleep(addr *uint32, val uint32, ns int64)
+//		Atomically,
+//			if *addr == val { sleep }
+//		Might be woken up spuriously; that's allowed.
+//		Don't sleep longer than ns; ns < 0 means forever.
+//
+//	futexwakeup(addr *uint32, cnt uint32)
+//		If any procs are sleeping on addr, wake up at most cnt.
+
+const (
+	mutex_unlocked = 0
+	mutex_locked   = 1
+	mutex_sleeping = 2
+
+	active_spin     = 4
+	active_spin_cnt = 30
+	passive_spin    = 1
+)
+
+// Possible lock states are mutex_unlocked, mutex_locked and mutex_sleeping.
+// mutex_sleeping means that there is presumably at least one sleeping thread.
+// Note that there can be spinning threads during all states - they do not
+// affect mutex's state.
+
+type mWaitList struct{}
+
+func mutexContended(l *mutex) bool {
+	return atomic.Load(key32(&l.key)) > mutex_locked
+}
+
+func lock(l *mutex) {
+	lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
+	gp := getg()
+
+	if gp.m.locks < 0 {
+		throw("runtime·lock: lock count")
+	}
+	gp.m.locks++
+
+	// Speculative grab for lock.
+	v := atomic.Xchg(key32(&l.key), mutex_locked)
+	if v == mutex_unlocked {
+		return
+	}
+
+	// wait is either MUTEX_LOCKED or MUTEX_SLEEPING
+	// depending on whether there is a thread sleeping
+	// on this mutex. If we ever change l->key from
+	// MUTEX_SLEEPING to some other value, we must be
+	// careful to change it back to MUTEX_SLEEPING before
+	// returning, to ensure that the sleeping thread gets
+	// its wakeup call.
+	wait := v
+
+	timer := &lockTimer{lock: l}
+	timer.begin()
+	// On uniprocessors, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin := 0
+	if ncpu > 1 {
+		spin = active_spin
+	}
+	for {
+		// Try for lock, spinning.
+		for i := 0; i < spin; i++ {
+			for l.key == mutex_unlocked {
+				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
+					timer.end()
+					return
+				}
+			}
+			procyield(active_spin_cnt)
+		}
+
+		// Try for lock, rescheduling.
+		for i := 0; i < passive_spin; i++ {
+			for l.key == mutex_unlocked {
+				if atomic.Cas(key32(&l.key), mutex_unlocked, wait) {
+					timer.end()
+					return
+				}
+			}
+			osyield()
+		}
+
+		// Sleep.
+		v = atomic.Xchg(key32(&l.key), mutex_sleeping)
+		if v == mutex_unlocked {
+			timer.end()
+			return
+		}
+		wait = mutex_sleeping
+		futexsleep(key32(&l.key), mutex_sleeping, -1)
+	}
+}
+
+func unlock(l *mutex) {
+	unlockWithRank(l)
+}
+
+func unlock2(l *mutex) {
+	v := atomic.Xchg(key32(&l.key), mutex_unlocked)
+	if v == mutex_unlocked {
+		throw("unlock of unlocked lock")
+	}
+	if v == mutex_sleeping {
+		futexwakeup(key32(&l.key), 1)
+	}
+
+	gp := getg()
+	gp.m.mLockProfile.recordUnlock(l)
+	gp.m.locks--
+	if gp.m.locks < 0 {
+		throw("runtime·unlock: lock count")
+	}
+	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
+		gp.stackguard0 = stackPreempt
+	}
+}
diff --git a/src/runtime/lock_js.go b/src/runtime/lock_js.go
index e70a881895..bc62c7985d 100644
--- a/src/runtime/lock_js.go
+++ b/src/runtime/lock_js.go
@@ -26,6 +26,8 @@ const (
 	passive_spin    = 1
 )
 
+type mWaitList struct{}
+
 func mutexContended(l *mutex) bool {
 	return false
 }
diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go
index 32d2235ad3..bddb8adea7 100644
--- a/src/runtime/lock_sema.go
+++ b/src/runtime/lock_sema.go
@@ -11,131 +11,6 @@ import (
 	"unsafe"
 )
 
-// This implementation depends on OS-specific implementations of
-//
-//	func semacreate(mp *m)
-//		Create a semaphore for mp, if it does not already have one.
-//
-//	func semasleep(ns int64) int32
-//		If ns < 0, acquire m's semaphore and return 0.
-//		If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.
-//		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
-//
-//	func semawakeup(mp *m)
-//		Wake up mp, which is or will soon be sleeping on its semaphore.
-const (
-	locked uintptr = 1
-
-	active_spin     = 4
-	active_spin_cnt = 30
-	passive_spin    = 1
-)
-
-func mutexContended(l *mutex) bool {
-	return atomic.Loaduintptr(&l.key) > locked
-}
-
-func lock(l *mutex) {
-	lockWithRank(l, getLockRank(l))
-}
-
-func lock2(l *mutex) {
-	gp := getg()
-	if gp.m.locks < 0 {
-		throw("runtime·lock: lock count")
-	}
-	gp.m.locks++
-
-	// Speculative grab for lock.
-	if atomic.Casuintptr(&l.key, 0, locked) {
-		return
-	}
-	semacreate(gp.m)
-
-	timer := &lockTimer{lock: l}
-	timer.begin()
-	// On uniprocessor's, no point spinning.
-	// On multiprocessors, spin for ACTIVE_SPIN attempts.
-	spin := 0
-	if ncpu > 1 {
-		spin = active_spin
-	}
-Loop:
-	for i := 0; ; i++ {
-		v := atomic.Loaduintptr(&l.key)
-		if v&locked == 0 {
-			// Unlocked. Try to lock.
-			if atomic.Casuintptr(&l.key, v, v|locked) {
-				timer.end()
-				return
-			}
-			i = 0
-		}
-		if i < spin {
-			procyield(active_spin_cnt)
-		} else if i < spin+passive_spin {
-			osyield()
-		} else {
-			// Someone else has it.
-			// l->waitm points to a linked list of M's waiting
-			// for this lock, chained through m->nextwaitm.
-			// Queue this M.
-			for {
-				gp.m.nextwaitm = muintptr(v &^ locked)
-				if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|locked) {
-					break
-				}
-				v = atomic.Loaduintptr(&l.key)
-				if v&locked == 0 {
-					continue Loop
-				}
-			}
-			if v&locked != 0 {
-				// Queued. Wait.
-				semasleep(-1)
-				i = 0
-			}
-		}
-	}
-}
-
-func unlock(l *mutex) {
-	unlockWithRank(l)
-}
-
-// We might not be holding a p in this code.
-//
-//go:nowritebarrier
-func unlock2(l *mutex) {
-	gp := getg()
-	var mp *m
-	for {
-		v := atomic.Loaduintptr(&l.key)
-		if v == locked {
-			if atomic.Casuintptr(&l.key, locked, 0) {
-				break
-			}
-		} else {
-			// Other M's are waiting for the lock.
-			// Dequeue an M.
-			mp = muintptr(v &^ locked).ptr()
-			if atomic.Casuintptr(&l.key, v, uintptr(mp.nextwaitm)) {
-				// Dequeued an M.  Wake it.
-				semawakeup(mp)
-				break
-			}
-		}
-	}
-	gp.m.mLockProfile.recordUnlock(l)
-	gp.m.locks--
-	if gp.m.locks < 0 {
-		throw("runtime·unlock: lock count")
-	}
-	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
-		gp.stackguard0 = stackPreempt
-	}
-}
-
 // One-time notifications.
 func noteclear(n *note) {
 	n.key = 0
diff --git a/src/runtime/lock_sema_tristate.go b/src/runtime/lock_sema_tristate.go
new file mode 100644
index 0000000000..c1f22c5de1
--- /dev/null
+++ b/src/runtime/lock_sema_tristate.go
@@ -0,0 +1,148 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || netbsd || openbsd || plan9 || solaris || windows || ((dragonfly || freebsd || linux) && goexperiment.spinbitmutex)
+
+package runtime
+
+import (
+	"internal/runtime/atomic"
+	"unsafe"
+)
+
+// This implementation depends on OS-specific implementations of
+//
+//	func semacreate(mp *m)
+//		Create a semaphore for mp, if it does not already have one.
+//
+//	func semasleep(ns int64) int32
+//		If ns < 0, acquire m's semaphore and return 0.
+//		If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.
+//		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
+//
+//	func semawakeup(mp *m)
+//		Wake up mp, which is or will soon be sleeping on its semaphore.
+const (
+	locked uintptr = 1
+
+	active_spin     = 4
+	active_spin_cnt = 30
+	passive_spin    = 1
+)
+
+// mWaitList is part of the M struct, and holds the list of Ms that are waiting
+// for a particular runtime.mutex.
+//
+// When an M is unable to immediately obtain a lock, it adds itself to the list
+// of Ms waiting for the lock. It does that via this struct's next field,
+// forming a singly-linked list with the mutex's key field pointing to the head
+// of the list.
+type mWaitList struct {
+	next muintptr // next m waiting for lock
+}
+
+func mutexContended(l *mutex) bool {
+	return atomic.Loaduintptr(&l.key) > locked
+}
+
+func lock(l *mutex) {
+	lockWithRank(l, getLockRank(l))
+}
+
+func lock2(l *mutex) {
+	gp := getg()
+	if gp.m.locks < 0 {
+		throw("runtime·lock: lock count")
+	}
+	gp.m.locks++
+
+	// Speculative grab for lock.
+	if atomic.Casuintptr(&l.key, 0, locked) {
+		return
+	}
+	semacreate(gp.m)
+
+	timer := &lockTimer{lock: l}
+	timer.begin()
+	// On uniprocessor's, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin := 0
+	if ncpu > 1 {
+		spin = active_spin
+	}
+Loop:
+	for i := 0; ; i++ {
+		v := atomic.Loaduintptr(&l.key)
+		if v&locked == 0 {
+			// Unlocked. Try to lock.
+			if atomic.Casuintptr(&l.key, v, v|locked) {
+				timer.end()
+				return
+			}
+			i = 0
+		}
+		if i < spin {
+			procyield(active_spin_cnt)
+		} else if i < spin+passive_spin {
+			osyield()
+		} else {
+			// Someone else has it.
+			// l.key points to a linked list of M's waiting
+			// for this lock, chained through m.mWaitList.next.
+			// Queue this M.
+			for {
+				gp.m.mWaitList.next = muintptr(v &^ locked)
+				if atomic.Casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|locked) {
+					break
+				}
+				v = atomic.Loaduintptr(&l.key)
+				if v&locked == 0 {
+					continue Loop
+				}
+			}
+			if v&locked != 0 {
+				// Queued. Wait.
+				semasleep(-1)
+				i = 0
+			}
+		}
+	}
+}
+
+func unlock(l *mutex) {
+	unlockWithRank(l)
+}
+
+// We might not be holding a p in this code.
+//
+//go:nowritebarrier
+func unlock2(l *mutex) {
+	gp := getg()
+	var mp *m
+	for {
+		v := atomic.Loaduintptr(&l.key)
+		if v == locked {
+			if atomic.Casuintptr(&l.key, locked, 0) {
+				break
+			}
+		} else {
+			// Other M's are waiting for the lock.
+			// Dequeue an M.
+			mp = muintptr(v &^ locked).ptr()
+			if atomic.Casuintptr(&l.key, v, uintptr(mp.mWaitList.next)) {
+				// Dequeued an M.  Wake it.
+				semawakeup(mp)
+				break
+			}
+		}
+	}
+	gp.m.mLockProfile.recordUnlock(l)
+	gp.m.locks--
+	if gp.m.locks < 0 {
+		throw("runtime·unlock: lock count")
+	}
+	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
+		gp.stackguard0 = stackPreempt
+	}
+}
diff --git a/src/runtime/lock_wasip1.go b/src/runtime/lock_wasip1.go
index acfc62acb4..f883841366 100644
--- a/src/runtime/lock_wasip1.go
+++ b/src/runtime/lock_wasip1.go
@@ -19,6 +19,8 @@ const (
 	active_spin_cnt = 30
 )
 
+type mWaitList struct{}
+
 func mutexContended(l *mutex) bool {
 	return false
 }
diff --git a/src/runtime/os_dragonfly.go b/src/runtime/os_dragonfly.go
index 2aeea17755..a02696eb4f 100644
--- a/src/runtime/os_dragonfly.go
+++ b/src/runtime/os_dragonfly.go
@@ -19,7 +19,9 @@ const (
 	_SIG_SETMASK = 3
 )
 
-type mOS struct{}
+type mOS struct {
+	waitsema uint32 // semaphore for parking on locks
+}
 
 //go:noescape
 func lwp_create(param *lwpparams) int32
diff --git a/src/runtime/os_freebsd.go b/src/runtime/os_freebsd.go
index d0d6f14fa0..3187317a76 100644
--- a/src/runtime/os_freebsd.go
+++ b/src/runtime/os_freebsd.go
@@ -10,7 +10,9 @@ import (
 	"unsafe"
 )
 
-type mOS struct{}
+type mOS struct {
+	waitsema uint32 // semaphore for parking on locks
+}
 
 //go:noescape
 func thr_new(param *thrparam, size int32) int32
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 8f5cf6db8a..8b3c4d0ecc 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -35,6 +35,8 @@ type mOS struct {
 	// This is a pointer to a chunk of memory allocated with a special
 	// mmap invocation in vgetrandomGetState().
 	vgetrandomState uintptr
+
+	waitsema uint32 // semaphore for parking on locks
 }
 
 //go:noescape
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 2d668637b1..b8c710a816 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -571,7 +571,7 @@ type m struct {
 	createstack     [32]uintptr // stack that created this thread, it's used for StackRecord.Stack0, so it must align with it.
 	lockedExt       uint32      // tracking for external LockOSThread
 	lockedInt       uint32      // tracking for internal lockOSThread
-	nextwaitm       muintptr    // next m waiting for lock
+	mWaitList       mWaitList   // list of runtime lock waiters
 
 	mLockProfile mLockProfile // fields relating to runtime.lock contention
 	profStack    []uintptr    // used for memory/block/mutex stack traces
author	Rhys Hiltner <rhys.hiltner@gmail.com>	2024-10-28 14:01:54 -0700
committer	Michael Knyszek <mknyszek@google.com>	2024-11-15 21:15:59 +0000
commit	18c2461af38e93ed385e953f3336fcaaca2da727 (patch)
tree	ca2437f778f0d63947692e38936c3708b416ec8b /src/runtime
parent	252e9def65cd6230447fc11046d7f4b176ae2d4b (diff)
download	go-18c2461af38e93ed385e953f3336fcaaca2da727.tar.xz