aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/export_test.go2
-rw-r--r--src/runtime/mprof.go2
-rw-r--r--src/runtime/pprof/pprof_test.go74
-rw-r--r--src/runtime/runtime2.go9
-rw-r--r--src/runtime/sema.go54
5 files changed, 115 insertions, 26 deletions
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index a89220e0dd..4168705f2a 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -1330,7 +1330,7 @@ func (t *SemTable) Enqueue(addr *uint32) {
//
// Returns true if there actually was a waiter to be dequeued.
func (t *SemTable) Dequeue(addr *uint32) bool {
- s, _ := t.semTable.rootFor(addr).dequeue(addr)
+ s, _, _ := t.semTable.rootFor(addr).dequeue(addr)
if s != nil {
releaseSudog(s)
return true
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 308ebaebe6..45f107722a 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -553,8 +553,6 @@ func mutexevent(cycles int64, skip int) {
cycles = 0
}
rate := int64(atomic.Load64(&mutexprofilerate))
- // TODO(pjw): measure impact of always calling fastrand vs using something
- // like malloc.go:nextSample()
if rate > 0 && int64(fastrand())%rate == 0 {
saveblockevent(cycles, rate, skip+1, mutexProfile)
}
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 1ade860441..9480240615 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -1023,7 +1023,7 @@ func containsStack(got [][]string, want []string) bool {
// awaitBlockedGoroutine spins on runtime.Gosched until a runtime stack dump
// shows a goroutine in the given state with a stack frame in
// runtime/pprof.<fName>.
-func awaitBlockedGoroutine(t *testing.T, state, fName string) {
+func awaitBlockedGoroutine(t *testing.T, state, fName string, count int) {
re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime/pprof\.%s`, regexp.QuoteMeta(state), fName)
r := regexp.MustCompile(re)
@@ -1047,7 +1047,7 @@ func awaitBlockedGoroutine(t *testing.T, state, fName string) {
buf = make([]byte, 2*len(buf))
continue
}
- if r.Match(buf[:n]) {
+ if len(r.FindAll(buf[:n], -1)) >= count {
return
}
}
@@ -1056,7 +1056,7 @@ func awaitBlockedGoroutine(t *testing.T, state, fName string) {
func blockChanRecv(t *testing.T) {
c := make(chan bool)
go func() {
- awaitBlockedGoroutine(t, "chan receive", "blockChanRecv")
+ awaitBlockedGoroutine(t, "chan receive", "blockChanRecv", 1)
c <- true
}()
<-c
@@ -1065,7 +1065,7 @@ func blockChanRecv(t *testing.T) {
func blockChanSend(t *testing.T) {
c := make(chan bool)
go func() {
- awaitBlockedGoroutine(t, "chan send", "blockChanSend")
+ awaitBlockedGoroutine(t, "chan send", "blockChanSend", 1)
<-c
}()
c <- true
@@ -1074,7 +1074,7 @@ func blockChanSend(t *testing.T) {
func blockChanClose(t *testing.T) {
c := make(chan bool)
go func() {
- awaitBlockedGoroutine(t, "chan receive", "blockChanClose")
+ awaitBlockedGoroutine(t, "chan receive", "blockChanClose", 1)
close(c)
}()
<-c
@@ -1086,7 +1086,7 @@ func blockSelectRecvAsync(t *testing.T) {
c2 := make(chan bool, 1)
go func() {
for i := 0; i < numTries; i++ {
- awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync")
+ awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync", 1)
c <- true
}
}()
@@ -1102,7 +1102,7 @@ func blockSelectSendSync(t *testing.T) {
c := make(chan bool)
c2 := make(chan bool)
go func() {
- awaitBlockedGoroutine(t, "select", "blockSelectSendSync")
+ awaitBlockedGoroutine(t, "select", "blockSelectSendSync", 1)
<-c
}()
select {
@@ -1115,7 +1115,7 @@ func blockMutex(t *testing.T) {
var mu sync.Mutex
mu.Lock()
go func() {
- awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex")
+ awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex", 1)
mu.Unlock()
}()
// Note: Unlock releases mu before recording the mutex event,
@@ -1125,12 +1125,36 @@ func blockMutex(t *testing.T) {
mu.Lock()
}
+func blockMutexN(t *testing.T, n int, d time.Duration) {
+ var wg sync.WaitGroup
+ var mu sync.Mutex
+ mu.Lock()
+ go func() {
+ awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex", n)
+ time.Sleep(d)
+ mu.Unlock()
+ }()
+ // Note: Unlock releases mu before recording the mutex event,
+ // so it's theoretically possible for this to proceed and
+ // capture the profile before the event is recorded. As long
+ // as this is blocked before the unlock happens, it's okay.
+ for i := 0; i < n; i++ {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ mu.Lock()
+ mu.Unlock()
+ }()
+ }
+ wg.Wait()
+}
+
func blockCond(t *testing.T) {
var mu sync.Mutex
c := sync.NewCond(&mu)
mu.Lock()
go func() {
- awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond")
+ awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond", 1)
mu.Lock()
c.Signal()
mu.Unlock()
@@ -1217,7 +1241,11 @@ func TestMutexProfile(t *testing.T) {
t.Fatalf("need MutexProfileRate 0, got %d", old)
}
- blockMutex(t)
+ const (
+ N = 100
+ D = 100 * time.Millisecond
+ )
+ blockMutexN(t, N, D)
t.Run("debug=1", func(t *testing.T) {
var w strings.Builder
@@ -1230,15 +1258,11 @@ func TestMutexProfile(t *testing.T) {
}
prof = strings.Trim(prof, "\n")
lines := strings.Split(prof, "\n")
- if len(lines) != 6 {
- t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
- }
if len(lines) < 6 {
- return
+ t.Fatalf("expected >=6 lines, got %d %q\n%s", len(lines), prof, prof)
}
// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+`
- //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
t.Errorf("%q didn't match %q", lines[3], r2)
}
@@ -1263,12 +1287,30 @@ func TestMutexProfile(t *testing.T) {
stks := stacks(p)
for _, want := range [][]string{
- {"sync.(*Mutex).Unlock", "runtime/pprof.blockMutex.func1"},
+ {"sync.(*Mutex).Unlock", "runtime/pprof.blockMutexN.func1"},
} {
if !containsStack(stks, want) {
t.Errorf("No matching stack entry for %+v", want)
}
}
+
+ i := 0
+ for ; i < len(p.SampleType); i++ {
+ if p.SampleType[i].Unit == "nanoseconds" {
+ break
+ }
+ }
+ if i >= len(p.SampleType) {
+ t.Fatalf("profile did not contain nanoseconds sample")
+ }
+ total := int64(0)
+ for _, s := range p.Sample {
+ total += s.Value[i]
+ }
+ d := time.Duration(total)
+ if d < N*D*9/10 || d > N*D*2 { // want N*D but allow [0.9,2.0]*that.
+ t.Fatalf("profile samples total %v, want %v", d, N*D)
+ }
})
}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 5017a7a80a..2a02e1fb3b 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -342,7 +342,7 @@ type gobuf struct {
bp uintptr // for framepointer-enabled architectures
}
-// sudog represents a g in a wait list, such as for sending/receiving
+// sudog (pseudo-g) represents a g in a wait list, such as for sending/receiving
// on a channel.
//
// sudog is necessary because the g ↔ synchronization object relation
@@ -382,6 +382,13 @@ type sudog struct {
// because c was closed.
success bool
+ // waiters is a count of semaRoot waiting list other than head of list,
+ // clamped to a uint16 to fit in unused space.
+ // Only meaningful at the head of the list.
+ // (If we wanted to be overly clever, we could store a high 16 bits
+ // in the second entry in the list.)
+ waiters uint16
+
parent *sudog // semaRoot binary tree
waitlink *sudog // g.waiting list or semaRoot
waittail *sudog // semaRoot
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
index d0a81170c3..3b6874ca11 100644
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -191,7 +191,7 @@ func semrelease1(addr *uint32, handoff bool, skipframes int) {
unlock(&root.lock)
return
}
- s, t0 := root.dequeue(addr)
+ s, t0, tailtime := root.dequeue(addr)
if s != nil {
root.nwait.Add(-1)
}
@@ -199,7 +199,28 @@ func semrelease1(addr *uint32, handoff bool, skipframes int) {
if s != nil { // May be slow or even yield, so unlock first
acquiretime := s.acquiretime
if acquiretime != 0 {
- mutexevent(t0-acquiretime, 3+skipframes)
+ // Charge contention that this (delayed) unlock caused.
+ // If there are N more goroutines waiting beyond the
+ // one that's waking up, charge their delay as well, so that
+ // contention holding up many goroutines shows up as
+ // more costly than contention holding up a single goroutine.
+ // It would take O(N) time to calculate how long each goroutine
+ // has been waiting, so instead we charge avg(head-wait, tail-wait)*N.
+ // head-wait is the longest wait and tail-wait is the shortest.
+ // (When we do a lifo insertion, we preserve this property by
+ // copying the old head's acquiretime into the inserted new head.
+ // In that case the overall average may be slightly high, but that's fine:
+ // the average of the ends is only an approximation to the actual
+ // average anyway.)
+ // The root.dequeue above changed the head and tail acquiretime
+ // to the current time, so the next unlock will not re-count this contention.
+ dt0 := t0 - acquiretime
+ dt := dt0
+ if s.waiters != 0 {
+ dtail := t0 - tailtime
+ dt += (dtail + dt0) / 2 * int64(s.waiters)
+ }
+ mutexevent(dt, 3+skipframes)
}
if s.ticket != 0 {
throw("corrupted semaphore ticket")
@@ -248,6 +269,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
s.elem = unsafe.Pointer(addr)
s.next = nil
s.prev = nil
+ s.waiters = 0
var last *sudog
pt := &root.treap
@@ -258,7 +280,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
// Substitute s in t's place in treap.
*pt = s
s.ticket = t.ticket
- s.acquiretime = t.acquiretime
+ s.acquiretime = t.acquiretime // preserve head acquiretime as oldest time
s.parent = t.parent
s.prev = t.prev
s.next = t.next
@@ -274,6 +296,10 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
if s.waittail == nil {
s.waittail = t
}
+ s.waiters = t.waiters
+ if s.waiters+1 != 0 {
+ s.waiters++
+ }
t.parent = nil
t.prev = nil
t.next = nil
@@ -287,6 +313,9 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
}
t.waittail = s
s.waitlink = nil
+ if t.waiters+1 != 0 {
+ t.waiters++
+ }
}
return
}
@@ -330,7 +359,10 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
// in semaRoot blocked on addr.
// If the sudog was being profiled, dequeue returns the time
// at which it was woken up as now. Otherwise now is 0.
-func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now int64) {
+// If there are additional entries in the wait list, dequeue
+// returns tailtime set to the last entry's acquiretime.
+// Otherwise tailtime is found.acquiretime.
+func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) {
ps := &root.treap
s := *ps
for ; s != nil; s = *ps {
@@ -343,7 +375,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now int64) {
ps = &s.next
}
}
- return nil, 0
+ return nil, 0, 0
Found:
now = int64(0)
@@ -368,7 +400,16 @@ Found:
} else {
t.waittail = nil
}
+ t.waiters = s.waiters
+ if t.waiters > 1 {
+ t.waiters--
+ }
+ // Set head and tail acquire time to 'now',
+ // because the caller will take care of charging
+ // the delays before now for all entries in the list.
t.acquiretime = now
+ tailtime = s.waittail.acquiretime
+ s.waittail.acquiretime = now
s.waitlink = nil
s.waittail = nil
} else {
@@ -390,13 +431,14 @@ Found:
} else {
root.treap = nil
}
+ tailtime = s.acquiretime
}
s.parent = nil
s.elem = nil
s.next = nil
s.prev = nil
s.ticket = 0
- return s, now
+ return s, now, tailtime
}
// rotateLeft rotates the tree rooted at node x.