From 4a7cf960c38d72e9f0c6f00e46e013be2a35d56e Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 3 Jan 2017 10:15:55 -0700 Subject: runtime: make ReadMemStats STW for < 25µs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ReadMemStats stops the world for ~1.7 ms/GB of heap because it collects statistics from every single span. For large heaps, this can be quite costly. This is particularly unfortunate because many production infrastructures call this function regularly to collect and report statistics. Fix this by tracking the necessary cumulative statistics in the mcaches. ReadMemStats still has to stop the world to stabilize these statistics, but there are only O(GOMAXPROCS) mcaches to collect statistics from, so this pause is only 25µs even at GOMAXPROCS=100. Fixes #13613. Change-Id: I3c0a4e14833f4760dab675efc1916e73b4c0032a Reviewed-on: https://go-review.googlesource.com/34937 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Rick Hudson --- src/runtime/export_test.go | 65 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'src/runtime/export_test.go') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index ae2454a425..80ddf2ea1f 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -271,3 +271,68 @@ func (p *ProfBuf) Read(mode profBufReadMode) ([]uint64, []unsafe.Pointer, bool) func (p *ProfBuf) Close() { (*profBuf)(p).close() } + +// ReadMemStatsSlow returns both the runtime-computed MemStats and +// MemStats accumulated by scanning the heap. +func ReadMemStatsSlow() (base, slow MemStats) { + stopTheWorld("ReadMemStatsSlow") + + // Run on the system stack to avoid stack growth allocation. + systemstack(func() { + // Make sure stats don't change. + getg().m.mallocing++ + + readmemstats_m(&base) + + // Initialize slow from base and zero the fields we're + // recomputing. + slow = base + slow.Alloc = 0 + slow.TotalAlloc = 0 + slow.Mallocs = 0 + slow.Frees = 0 + var bySize [_NumSizeClasses]struct { + Mallocs, Frees uint64 + } + + // Add up current allocations in spans. + for _, s := range mheap_.allspans { + if s.state != mSpanInUse { + continue + } + if s.sizeclass == 0 { + slow.Mallocs++ + slow.Alloc += uint64(s.elemsize) + } else { + slow.Mallocs += uint64(s.allocCount) + slow.Alloc += uint64(s.allocCount) * uint64(s.elemsize) + bySize[s.sizeclass].Mallocs += uint64(s.allocCount) + } + } + + // Add in frees. readmemstats_m flushed the cached stats, so + // these are up-to-date. + var smallFree uint64 + slow.Frees = mheap_.nlargefree + for i := range mheap_.nsmallfree { + slow.Frees += mheap_.nsmallfree[i] + bySize[i].Frees = mheap_.nsmallfree[i] + bySize[i].Mallocs += mheap_.nsmallfree[i] + smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i]) + } + slow.Frees += memstats.tinyallocs + slow.Mallocs += slow.Frees + + slow.TotalAlloc = slow.Alloc + mheap_.largefree + smallFree + + for i := range slow.BySize { + slow.BySize[i].Mallocs = bySize[i].Mallocs + slow.BySize[i].Frees = bySize[i].Frees + } + + getg().m.mallocing-- + }) + + startTheWorld() + return +} -- cgit v1.3-5-g9baa