From 8ebc58452af3a586a3da1f68725bc83c78d4b073 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 29 Jul 2020 20:25:05 +0000 Subject: runtime: delineate which memstats are system stats with a type This change modifies the type of several mstats fields to be a new type: sysMemStat. This type has the same structure as the fields used to have. The purpose of this change is to make it very clear which stats may be used in various functions for accounting (usually the platform-specific sys* functions, but there are others). Currently there's an implicit understanding that the *uint64 value passed to these functions is some kind of statistic whose value is atomically managed. This understanding isn't inherently problematic, but we're about to change how some stats (which currently use mSysStatInc and mSysStatDec) work, so we want to make it very clear what the various requirements are around "sysStat". This change also removes mSysStatInc and mSysStatDec in favor of a method on sysMemStat. Note that those two functions were originally written the way they were because atomic 64-bit adds required a valid G on ARM, but this hasn't been the case for a very long time (since golang.org/cl/14204, but even before then it wasn't clear if mutexes required a valid G anymore). Today we implement 64-bit adds on ARM with a spinlock table. Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246971 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/runtime/heapdump.go') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 4c35309211..495ecc5164 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -548,20 +548,20 @@ func dumpmemstats() { dumpint(memstats.nmalloc) dumpint(memstats.nfree) dumpint(memstats.heap_alloc) - dumpint(memstats.heap_sys) + dumpint(memstats.heap_sys.load()) dumpint(memstats.heap_idle) dumpint(memstats.heap_inuse) dumpint(memstats.heap_released) dumpint(memstats.heap_objects) dumpint(memstats.stacks_inuse) - dumpint(memstats.stacks_sys) + dumpint(memstats.stacks_sys.load()) dumpint(memstats.mspan_inuse) - dumpint(memstats.mspan_sys) + dumpint(memstats.mspan_sys.load()) dumpint(memstats.mcache_inuse) - dumpint(memstats.mcache_sys) - dumpint(memstats.buckhash_sys) - dumpint(memstats.gc_sys) - dumpint(memstats.other_sys) + dumpint(memstats.mcache_sys.load()) + dumpint(memstats.buckhash_sys.load()) + dumpint(memstats.gc_sys.load()) + dumpint(memstats.other_sys.load()) dumpint(memstats.next_gc) dumpint(memstats.last_gc_unix) dumpint(memstats.pause_total_ns) -- cgit v1.3 From ad863ba32a2ede207d708fa15897e9de1d14dd87 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 19:23:30 +0000 Subject: runtime: break down memstats.gc_sys This change breaks apart gc_sys into three distinct pieces. Two of those pieces are pieces which come from heap_sys since they're allocated from the page heap. The rest comes from memory mapped from e.g. persistentalloc which better fits the purpose of a sysMemStat. Also, rename gc_sys to gcMiscSys. Change-Id: I098789170052511e7b31edbcdc9a53e5c24573f7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246973 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 5 ++++- src/runtime/malloc.go | 6 +++--- src/runtime/mcheckmark.go | 2 +- src/runtime/mfinal.go | 2 +- src/runtime/mheap.go | 16 ++++++++++------ src/runtime/mspanset.go | 4 ++-- src/runtime/mstats.go | 31 ++++++++++++++++++------------- 7 files changed, 39 insertions(+), 27 deletions(-) (limited to 'src/runtime/heapdump.go') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 495ecc5164..eed47930f0 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -540,6 +540,9 @@ func dumpms() { } func dumpmemstats() { + // These ints should be identical to the exported + // MemStats structure and should be ordered the same + // way too. dumpint(tagMemStats) dumpint(memstats.alloc) dumpint(memstats.total_alloc) @@ -560,7 +563,7 @@ func dumpmemstats() { dumpint(memstats.mcache_inuse) dumpint(memstats.mcache_sys.load()) dumpint(memstats.buckhash_sys.load()) - dumpint(memstats.gc_sys.load()) + dumpint(memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse) dumpint(memstats.other_sys.load()) dumpint(memstats.next_gc) dumpint(memstats.last_gc_unix) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 27d678d917..ee22bad58c 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -743,9 +743,9 @@ mapped: throw("arena already initialized") } var r *heapArena - r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gcMiscSys)) if r == nil { - r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gcMiscSys)) if r == nil { throw("out of memory allocating heap arena metadata") } @@ -757,7 +757,7 @@ mapped: if size == 0 { size = physPageSize } - newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gc_sys)) + newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gcMiscSys)) if newArray == nil { throw("out of memory allocating allArenas") } diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go index 1fd8e4e78f..c0b028d715 100644 --- a/src/runtime/mcheckmark.go +++ b/src/runtime/mcheckmark.go @@ -41,7 +41,7 @@ func startCheckmarks() { if bitmap == nil { // Allocate bitmap on first use. - bitmap = (*checkmarksMap)(persistentalloc(unsafe.Sizeof(*bitmap), 0, &memstats.gc_sys)) + bitmap = (*checkmarksMap)(persistentalloc(unsafe.Sizeof(*bitmap), 0, &memstats.gcMiscSys)) if bitmap == nil { throw("out of memory allocating checkmarks bitmap") } diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go index 6676ae6736..6ec5133be0 100644 --- a/src/runtime/mfinal.go +++ b/src/runtime/mfinal.go @@ -88,7 +88,7 @@ func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot lock(&finlock) if finq == nil || finq.cnt == uint32(len(finq.fin)) { if finc == nil { - finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gc_sys)) + finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gcMiscSys)) finc.alllink = allfin allfin = finc if finptrmask[0] == 0 { diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 27c1bfbcf1..1624a04b9d 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -713,7 +713,7 @@ func (h *mheap) init() { h.central[i].mcentral.init(spanClass(i)) } - h.pages.init(&h.lock, &memstats.gc_sys) + h.pages.init(&h.lock, &memstats.gcMiscSys) } // reclaim sweeps and reclaims at least npage pages into the heap. @@ -1230,8 +1230,10 @@ HaveSpan: atomic.Xadd64(&memstats.heap_inuse, int64(nbytes)) case spanAllocStack: atomic.Xadd64(&memstats.stacks_inuse, int64(nbytes)) - case spanAllocPtrScalarBits, spanAllocWorkBuf: - memstats.gc_sys.add(int64(nbytes)) + case spanAllocWorkBuf: + atomic.Xadd64(&memstats.gcWorkBufInUse, int64(nbytes)) + case spanAllocPtrScalarBits: + atomic.Xadd64(&memstats.gcProgPtrScalarBitsInUse, int64(nbytes)) } if typ.manual() { // Manually managed memory doesn't count toward heap_sys. @@ -1406,8 +1408,10 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { atomic.Xadd64(&memstats.heap_inuse, -int64(nbytes)) case spanAllocStack: atomic.Xadd64(&memstats.stacks_inuse, -int64(nbytes)) - case spanAllocPtrScalarBits, spanAllocWorkBuf: - memstats.gc_sys.add(-int64(nbytes)) + case spanAllocWorkBuf: + atomic.Xadd64(&memstats.gcWorkBufInUse, -int64(nbytes)) + case spanAllocPtrScalarBits: + atomic.Xadd64(&memstats.gcProgPtrScalarBitsInUse, -int64(nbytes)) } if typ.manual() { // Manually managed memory doesn't count toward heap_sys, so add it back. @@ -1956,7 +1960,7 @@ func newArenaMayUnlock() *gcBitsArena { var result *gcBitsArena if gcBitsArenas.free == nil { unlock(&gcBitsArenas.lock) - result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys)) + result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gcMiscSys)) if result == nil { throw("runtime: cannot allocate memory") } diff --git a/src/runtime/mspanset.go b/src/runtime/mspanset.go index 490eed4549..10d2596c38 100644 --- a/src/runtime/mspanset.go +++ b/src/runtime/mspanset.go @@ -102,7 +102,7 @@ retry: if newCap == 0 { newCap = spanSetInitSpineCap } - newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gc_sys) + newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gcMiscSys) if b.spineCap != 0 { // Blocks are allocated off-heap, so // no write barriers. @@ -283,7 +283,7 @@ func (p *spanSetBlockAlloc) alloc() *spanSetBlock { if s := (*spanSetBlock)(p.stack.pop()); s != nil { return s } - return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gc_sys)) + return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gcMiscSys)) } // free returns a spanSetBlock back to the pool. diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 466f33836c..967fe6e2be 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -44,15 +44,17 @@ type mstats struct { // Statistics about allocation of low-level fixed-size structures. // Protected by FixAlloc locks. - stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW - stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys - mspan_inuse uint64 // mspan structures - mspan_sys sysMemStat - mcache_inuse uint64 // mcache structures - mcache_sys sysMemStat - buckhash_sys sysMemStat // profiling bucket hash table - gc_sys sysMemStat // updated atomically or during STW - other_sys sysMemStat // updated atomically or during STW + stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW + stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys + mspan_inuse uint64 // mspan structures + mspan_sys sysMemStat + mcache_inuse uint64 // mcache structures + mcache_sys sysMemStat + buckhash_sys sysMemStat // profiling bucket hash table + gcWorkBufInUse uint64 // updated atomically or during STW + gcProgPtrScalarBitsInUse uint64 // updated atomically or during STW + gcMiscSys sysMemStat // updated atomically or during STW + other_sys sysMemStat // updated atomically or during STW // Statistics about the garbage collector. @@ -472,7 +474,10 @@ func readmemstats_m(stats *MemStats) { stats.MCacheInuse = memstats.mcache_inuse stats.MCacheSys = memstats.mcache_sys.load() stats.BuckHashSys = memstats.buckhash_sys.load() - stats.GCSys = memstats.gc_sys.load() + // MemStats defines GCSys as an aggregate of all memory related + // to the memory management system, but we track this memory + // at a more granular level in the runtime. + stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse stats.OtherSys = memstats.other_sys.load() stats.NextGC = memstats.next_gc stats.LastGC = memstats.last_gc_unix @@ -557,11 +562,11 @@ func updatememstats() { memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() + - memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gc_sys.load() + + memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + memstats.other_sys.load() - // We also count stacks_inuse as sys memory. - memstats.sys += memstats.stacks_inuse + // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. + memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse // Calculate memory allocator stats. // During program execution we only count number of frees and amount of freed memory. -- cgit v1.3 From c5dea8f38726572ddc161e5d169a453639edb7b1 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 19:27:59 +0000 Subject: runtime: remove memstats.heap_idle This statistic is updated in many places but for MemStats may be computed from existing statistics. Specifically by definition heap_idle = heap_sys - heap_inuse since heap_sys is all memory allocated from the OS for use in the heap minus memory used for non-heap purposes. heap_idle is almost the same (since it explicitly includes memory that *could* be used for non-heap purposes) but also doesn't include memory that's actually used to hold heap objects. Although it has some utility as a sanity check, it complicates accounting and we want fewer, orthogonal statistics for upcoming metrics changes, so just drop it. Change-Id: I40af54a38e335f43249f6e218f35088bfd4380d1 Reviewed-on: https://go-review.googlesource.com/c/go/+/246974 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 2 +- src/runtime/mheap.go | 3 --- src/runtime/mstats.go | 19 +++++++++++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'src/runtime/heapdump.go') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index eed47930f0..f96475e848 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -552,7 +552,7 @@ func dumpmemstats() { dumpint(memstats.nfree) dumpint(memstats.heap_alloc) dumpint(memstats.heap_sys.load()) - dumpint(memstats.heap_idle) + dumpint(memstats.heap_sys.load() - memstats.heap_inuse) dumpint(memstats.heap_inuse) dumpint(memstats.heap_released) dumpint(memstats.heap_objects) diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 1624a04b9d..87d2fd495b 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1239,7 +1239,6 @@ HaveSpan: // Manually managed memory doesn't count toward heap_sys. memstats.heap_sys.add(-int64(nbytes)) } - atomic.Xadd64(&memstats.heap_idle, -int64(nbytes)) // Publish the span in various locations. @@ -1317,7 +1316,6 @@ func (h *mheap) grow(npage uintptr) bool { // size which is always > physPageSize, so its safe to // just add directly to heap_released. atomic.Xadd64(&memstats.heap_released, int64(asize)) - atomic.Xadd64(&memstats.heap_idle, int64(asize)) // Recalculate nBase. // We know this won't overflow, because sysAlloc returned @@ -1417,7 +1415,6 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { // Manually managed memory doesn't count toward heap_sys, so add it back. memstats.heap_sys.add(int64(nbytes)) } - atomic.Xadd64(&memstats.heap_idle, int64(nbytes)) // Mark the space as free. h.pages.free(s.base(), s.npages) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 967fe6e2be..43f74273f7 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -34,7 +34,6 @@ type mstats struct { // in manually-managed spans. heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above) heap_sys sysMemStat // virtual address space obtained from system for GC'd heap - heap_idle uint64 // bytes in idle spans heap_inuse uint64 // bytes in mSpanInUse spans heap_released uint64 // bytes released to the os @@ -461,7 +460,23 @@ func readmemstats_m(stats *MemStats) { stats.Frees = memstats.nfree stats.HeapAlloc = memstats.heap_alloc stats.HeapSys = memstats.heap_sys.load() - stats.HeapIdle = memstats.heap_idle + // By definition, HeapIdle is memory that was mapped + // for the heap but is not currently used to hold heap + // objects. It also specifically is memory that can be + // used for other purposes, like stacks, but this memory + // is subtracted out of HeapSys before it makes that + // transition. Put another way: + // + // heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes + // heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose + // + // or + // + // heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse + // heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse + // + // => heap_idle = heap_sys - heap_inuse + stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse stats.HeapInuse = memstats.heap_inuse stats.HeapReleased = memstats.heap_released stats.HeapObjects = memstats.heap_objects -- cgit v1.3 From ae585ee52c2437bfd0e955ad6fc8911bf292f51d Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 19:31:23 +0000 Subject: runtime: remove memstats.heap_alloc memstats.heap_alloc is 100% a duplicate and unnecessary copy of memstats.alloc which exists because MemStats used to be populated from memstats via a memmove. Change-Id: I995489f61be39786e573b8494a8ab6d4ea8bed9c Reviewed-on: https://go-review.googlesource.com/c/go/+/246975 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 2 +- src/runtime/mstats.go | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'src/runtime/heapdump.go') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index f96475e848..6fcd9746af 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -550,7 +550,7 @@ func dumpmemstats() { dumpint(memstats.nlookup) dumpint(memstats.nmalloc) dumpint(memstats.nfree) - dumpint(memstats.heap_alloc) + dumpint(memstats.alloc) dumpint(memstats.heap_sys.load()) dumpint(memstats.heap_sys.load() - memstats.heap_inuse) dumpint(memstats.heap_inuse) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 43f74273f7..a6e38d1c1b 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -32,7 +32,6 @@ type mstats struct { // // Like MemStats, heap_sys and heap_inuse do not count memory // in manually-managed spans. - heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above) heap_sys sysMemStat // virtual address space obtained from system for GC'd heap heap_inuse uint64 // bytes in mSpanInUse spans heap_released uint64 // bytes released to the os @@ -112,11 +111,10 @@ type mstats struct { // heap_live is the number of bytes considered live by the GC. // That is: retained by the most recent GC plus allocated - // since then. heap_live <= heap_alloc, since heap_alloc - // includes unmarked objects that have not yet been swept (and - // hence goes up as we allocate and down as we sweep) while - // heap_live excludes these objects (and hence only goes up - // between GCs). + // since then. heap_live <= alloc, since alloc includes unmarked + // objects that have not yet been swept (and hence goes up as we + // allocate and down as we sweep) while heap_live excludes these + // objects (and hence only goes up between GCs). // // This is updated atomically without locking. To reduce // contention, this is updated only when obtaining a span from @@ -458,7 +456,7 @@ func readmemstats_m(stats *MemStats) { stats.Sys = memstats.sys stats.Mallocs = memstats.nmalloc stats.Frees = memstats.nfree - stats.HeapAlloc = memstats.heap_alloc + stats.HeapAlloc = memstats.alloc stats.HeapSys = memstats.heap_sys.load() // By definition, HeapIdle is memory that was mapped // for the heap but is not currently used to hold heap @@ -639,7 +637,6 @@ func updatememstats() { // Calculate derived stats. memstats.total_alloc = totalAlloc memstats.alloc = totalAlloc - totalFree - memstats.heap_alloc = memstats.alloc memstats.heap_objects = memstats.nmalloc - memstats.nfree } -- cgit v1.3 From 32d0eaa44e2d83cff6f0c1fa3d58af7627f3cd99 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 17 Sep 2020 21:19:28 +0000 Subject: runtime: implement dumpmemstats in terms of readmemstats_m Since MemStats is now populated directly and some values are derived, avoid duplicating the logic by instead populating the heap dump directly from MemStats (external version) instead of memstats (runtime internal version). Change-Id: I0bec96bfa02d2ffd1b56475779c124a760e64238 Reviewed-on: https://go-review.googlesource.com/c/go/+/255817 Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 76 ++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 32 deletions(-) (limited to 'src/runtime/heapdump.go') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 6fcd9746af..33e224d587 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -20,8 +20,19 @@ import ( func runtime_debug_WriteHeapDump(fd uintptr) { stopTheWorld("write heap dump") + // Keep m on this G's stack instead of the system stack. + // Both readmemstats_m and writeheapdump_m have pretty large + // peak stack depths and we risk blowing the system stack. + // This is safe because the world is stopped, so we don't + // need to worry about anyone shrinking and therefore moving + // our stack. + var m MemStats systemstack(func() { - writeheapdump_m(fd) + // Call readmemstats_m here instead of deeper in + // writeheapdump_m because we might blow the system stack + // otherwise. + readmemstats_m(&m) + writeheapdump_m(fd, &m) }) startTheWorld() @@ -539,39 +550,40 @@ func dumpms() { } } -func dumpmemstats() { +//go:systemstack +func dumpmemstats(m *MemStats) { // These ints should be identical to the exported // MemStats structure and should be ordered the same // way too. dumpint(tagMemStats) - dumpint(memstats.alloc) - dumpint(memstats.total_alloc) - dumpint(memstats.sys) - dumpint(memstats.nlookup) - dumpint(memstats.nmalloc) - dumpint(memstats.nfree) - dumpint(memstats.alloc) - dumpint(memstats.heap_sys.load()) - dumpint(memstats.heap_sys.load() - memstats.heap_inuse) - dumpint(memstats.heap_inuse) - dumpint(memstats.heap_released) - dumpint(memstats.heap_objects) - dumpint(memstats.stacks_inuse) - dumpint(memstats.stacks_sys.load()) - dumpint(memstats.mspan_inuse) - dumpint(memstats.mspan_sys.load()) - dumpint(memstats.mcache_inuse) - dumpint(memstats.mcache_sys.load()) - dumpint(memstats.buckhash_sys.load()) - dumpint(memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse) - dumpint(memstats.other_sys.load()) - dumpint(memstats.next_gc) - dumpint(memstats.last_gc_unix) - dumpint(memstats.pause_total_ns) + dumpint(m.Alloc) + dumpint(m.TotalAlloc) + dumpint(m.Sys) + dumpint(m.Lookups) + dumpint(m.Mallocs) + dumpint(m.Frees) + dumpint(m.HeapAlloc) + dumpint(m.HeapSys) + dumpint(m.HeapIdle) + dumpint(m.HeapInuse) + dumpint(m.HeapReleased) + dumpint(m.HeapObjects) + dumpint(m.StackInuse) + dumpint(m.StackSys) + dumpint(m.MSpanInuse) + dumpint(m.MSpanSys) + dumpint(m.MCacheInuse) + dumpint(m.MCacheSys) + dumpint(m.BuckHashSys) + dumpint(m.GCSys) + dumpint(m.OtherSys) + dumpint(m.NextGC) + dumpint(m.LastGC) + dumpint(m.PauseTotalNs) for i := 0; i < 256; i++ { - dumpint(memstats.pause_ns[i]) + dumpint(m.PauseNs[i]) } - dumpint(uint64(memstats.numgc)) + dumpint(uint64(m.NumGC)) } func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) { @@ -642,7 +654,7 @@ func dumpmemprof() { var dumphdr = []byte("go1.7 heap dump\n") -func mdump() { +func mdump(m *MemStats) { // make sure we're done sweeping for _, s := range mheap_.allspans { if s.state.get() == mSpanInUse { @@ -657,13 +669,13 @@ func mdump() { dumpgs() dumpms() dumproots() - dumpmemstats() + dumpmemstats(m) dumpmemprof() dumpint(tagEOF) flush() } -func writeheapdump_m(fd uintptr) { +func writeheapdump_m(fd uintptr, m *MemStats) { _g_ := getg() casgstatus(_g_.m.curg, _Grunning, _Gwaiting) _g_.waitreason = waitReasonDumpingHeap @@ -677,7 +689,7 @@ func writeheapdump_m(fd uintptr) { dumpfd = fd // Call dump routine. - mdump() + mdump(m) // Reset dump file. dumpfd = 0 -- cgit v1.3 From 6abbfc17c255c07134a69c3ca305231db80530ec Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 28 Oct 2020 18:06:05 -0400 Subject: runtime: add world-stopped assertions Stopping the world is an implicit lock for many operations, so we should assert the world is stopped in functions that require it. This is enabled along with the rest of lock ranking, though it is a bit orthogonal and likely cheap enough to enable all the time should we choose. Requiring a lock _or_ world stop is common, so that can be expressed as well. Updates #40677 Change-Id: If0a58544f4251d367f73c4120c9d39974c6cd091 Reviewed-on: https://go-review.googlesource.com/c/go/+/248577 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Austin Clements Trust: Michael Pratt --- src/runtime/heapdump.go | 15 +++++++++ src/runtime/lockrank_off.go | 16 +++++++++ src/runtime/lockrank_on.go | 79 +++++++++++++++++++++++++++++++++++++++++++++ src/runtime/mcheckmark.go | 2 ++ src/runtime/mgc.go | 2 ++ src/runtime/mgcmark.go | 4 +++ src/runtime/mgcsweep.go | 2 ++ src/runtime/mstats.go | 14 ++++++-- src/runtime/proc.go | 14 ++++++++ 9 files changed, 145 insertions(+), 3 deletions(-) (limited to 'src/runtime/heapdump.go') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 33e224d587..2d531571aa 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -431,6 +431,9 @@ func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, o } func dumproots() { + // To protect mheap_.allspans. + assertWorldStopped() + // TODO(mwhudson): dump datamask etc from all objects // data segment dumpint(tagData) @@ -468,6 +471,9 @@ func dumproots() { var freemark [_PageSize / 8]bool func dumpobjs() { + // To protect mheap_.allspans. + assertWorldStopped() + for _, s := range mheap_.allspans { if s.state.get() != mSpanInUse { continue @@ -552,6 +558,8 @@ func dumpms() { //go:systemstack func dumpmemstats(m *MemStats) { + assertWorldStopped() + // These ints should be identical to the exported // MemStats structure and should be ordered the same // way too. @@ -634,6 +642,9 @@ func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, } func dumpmemprof() { + // To protect mheap_.allspans. + assertWorldStopped() + iterate_memprof(dumpmemprof_callback) for _, s := range mheap_.allspans { if s.state.get() != mSpanInUse { @@ -655,6 +666,8 @@ func dumpmemprof() { var dumphdr = []byte("go1.7 heap dump\n") func mdump(m *MemStats) { + assertWorldStopped() + // make sure we're done sweeping for _, s := range mheap_.allspans { if s.state.get() == mSpanInUse { @@ -676,6 +689,8 @@ func mdump(m *MemStats) { } func writeheapdump_m(fd uintptr, m *MemStats) { + assertWorldStopped() + _g_ := getg() casgstatus(_g_.m.curg, _Grunning, _Gwaiting) _g_.waitreason = waitReasonDumpingHeap diff --git a/src/runtime/lockrank_off.go b/src/runtime/lockrank_off.go index 40edf882ee..7dcd8f5fe9 100644 --- a/src/runtime/lockrank_off.go +++ b/src/runtime/lockrank_off.go @@ -46,3 +46,19 @@ func assertLockHeld(l *mutex) { //go:nosplit func assertRankHeld(r lockRank) { } + +//go:nosplit +func worldStopped() { +} + +//go:nosplit +func worldStarted() { +} + +//go:nosplit +func assertWorldStopped() { +} + +//go:nosplit +func assertWorldStoppedOrLockHeld(l *mutex) { +} diff --git a/src/runtime/lockrank_on.go b/src/runtime/lockrank_on.go index db7ff23a58..c25b3a4656 100644 --- a/src/runtime/lockrank_on.go +++ b/src/runtime/lockrank_on.go @@ -7,9 +7,14 @@ package runtime import ( + "runtime/internal/atomic" "unsafe" ) +// worldIsStopped is accessed atomically to track world-stops. 1 == world +// stopped. +var worldIsStopped uint32 + // lockRankStruct is embedded in mutex type lockRankStruct struct { // static lock ranking of the lock @@ -284,3 +289,77 @@ func assertRankHeld(r lockRank) { throw("not holding required lock!") }) } + +// worldStopped notes that the world is stopped. +// +// Caller must hold worldsema. +// +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func worldStopped() { + if stopped := atomic.Xadd(&worldIsStopped, 1); stopped != 1 { + print("world stop count=", stopped, "\n") + throw("recursive world stop") + } +} + +// worldStarted that the world is starting. +// +// Caller must hold worldsema. +// +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func worldStarted() { + if stopped := atomic.Xadd(&worldIsStopped, -1); stopped != 0 { + print("world stop count=", stopped, "\n") + throw("released non-stopped world stop") + } +} + +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func checkWorldStopped() bool { + stopped := atomic.Load(&worldIsStopped) + if stopped > 1 { + print("inconsistent world stop count=", stopped, "\n") + throw("inconsistent world stop count") + } + + return stopped == 1 +} + +// assertWorldStopped throws if the world is not stopped. It does not check +// which M stopped the world. +// +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func assertWorldStopped() { + if checkWorldStopped() { + return + } + + throw("world not stopped") +} + +// assertWorldStoppedOrLockHeld throws if the world is not stopped and the +// passed lock is not held. +// +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func assertWorldStoppedOrLockHeld(l *mutex) { + if checkWorldStopped() { + return + } + + gp := getg() + systemstack(func() { + held := checkLockHeld(gp, l) + if !held { + printlock() + print("caller requires world stop or lock ", l, " (rank ", l.rank.String(), "), holding:\n") + println("") + printHeldLocks(gp) + throw("no world stop or required lock!") + } + }) +} diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go index c0b028d715..ba80ac1bdf 100644 --- a/src/runtime/mcheckmark.go +++ b/src/runtime/mcheckmark.go @@ -34,6 +34,8 @@ var useCheckmark = false // // The world must be stopped. func startCheckmarks() { + assertWorldStopped() + // Clear all checkmarks. for _, ai := range mheap_.allArenas { arena := mheap_.arenas[ai.l1()][ai.l2()] diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 9d2682f03c..fb3c149942 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -2164,6 +2164,8 @@ func gcMark(start_time int64) { // //go:systemstack func gcSweep(mode gcMode) { + assertWorldStopped() + if gcphase != _GCoff { throw("gcSweep being done but phase is not GCoff") } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index c71c0e58d3..5a24cdac88 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -54,6 +54,8 @@ const ( // // The world must be stopped. func gcMarkRootPrepare() { + assertWorldStopped() + work.nFlushCacheRoots = 0 // Compute how many data and BSS root blocks there are. @@ -1535,6 +1537,8 @@ func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) { // // The world must be stopped. func gcMarkTinyAllocs() { + assertWorldStopped() + for _, p := range allp { c := p.mcache if c == nil || c.tiny == 0 { diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 9b77ce635c..8391435630 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -123,6 +123,8 @@ func (h *mheap) nextSpanForSweep() *mspan { // //go:nowritebarrier func finishsweep_m() { + assertWorldStopped() + // Sweeping must be complete before marking commences, so // sweep any unswept spans. If this is a concurrent GC, there // shouldn't be any spans left to sweep, so this should finish diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index e0a417d213..3829355d7b 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -601,6 +601,8 @@ func readGCStats_m(pauses *[]uint64) { // //go:nowritebarrier func updatememstats() { + assertWorldStopped() + // Flush mcaches to mcentral before doing anything else. // // Flushing to the mcentral may in general cause stats to @@ -706,6 +708,8 @@ func updatememstats() { // //go:nowritebarrier func flushmcache(i int) { + assertWorldStopped() + p := allp[i] c := p.mcache if c == nil { @@ -721,6 +725,8 @@ func flushmcache(i int) { // //go:nowritebarrier func flushallmcaches() { + assertWorldStopped() + for i := 0; i < int(gomaxprocs); i++ { flushmcache(i) } @@ -876,10 +882,10 @@ func (m *consistentHeapStats) release(c *mcache) { // unsafeRead aggregates the delta for this shard into out. // // Unsafe because it does so without any synchronization. The -// only safe time to call this is if the world is stopped or -// we're freezing the world or going down anyway (and we just -// want _some_ estimate). +// world must be stopped. func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { + assertWorldStopped() + for i := range m.stats { out.merge(&m.stats[i]) } @@ -890,6 +896,8 @@ func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { // Unsafe because the world must be stopped and values should // be donated elsewhere before clearing. func (m *consistentHeapStats) unsafeClear() { + assertWorldStopped() + for i := range m.stats { m.stats[i] = heapStatsDelta{} } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 939757f3a7..82284e6cd6 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -587,6 +587,9 @@ func schedinit() { sched.maxmcount = 10000 + // The world starts stopped. + worldStopped() + moduledataverify() stackinit() mallocinit() @@ -617,6 +620,9 @@ func schedinit() { } unlock(&sched.lock) + // World is effectively started now, as P's can run. + worldStarted() + // For cgocheck > 1, we turn on the write barrier at all times // and check all pointer writes. We can't do this until after // procresize because the write barrier needs a P. @@ -1082,9 +1088,13 @@ func stopTheWorldWithSema() { if bad != "" { throw(bad) } + + worldStopped() } func startTheWorldWithSema(emitTraceEvent bool) int64 { + assertWorldStopped() + mp := acquirem() // disable preemption because it can be holding p in a local var if netpollinited() { list := netpoll(0) // non-blocking @@ -1105,6 +1115,8 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 { } unlock(&sched.lock) + worldStarted() + for p1 != nil { p := p1 p1 = p1.link.ptr() @@ -4539,6 +4551,7 @@ func (pp *p) init(id int32) { // sched.lock must be held and the world must be stopped. func (pp *p) destroy() { assertLockHeld(&sched.lock) + assertWorldStopped() // Move all runnable goroutines to the global queue for pp.runqhead != pp.runqtail { @@ -4629,6 +4642,7 @@ func (pp *p) destroy() { // Returns list of Ps with local work, they need to be scheduled by the caller. func procresize(nprocs int32) *p { assertLockHeld(&sched.lock) + assertWorldStopped() old := gomaxprocs if old < 0 || nprocs <= 0 { -- cgit v1.3