From 34835df04891a1d54394888b763af88f9476101d Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 9 Sep 2020 16:52:18 +0000
Subject: runtime: fix ReadMemStatsSlow's and CheckScavengedBits' chunk
 iteration

Both ReadMemStatsSlow and CheckScavengedBits iterate over the page
allocator's chunks but don't actually check if they exist. During the
development process the chunks index became sparse, so now this was a
possibility. If the runtime tests' heap is sparse we might end up
segfaulting in either one of these functions, though this will generally
be very rare.

The pattern here to return nil for a nonexistent chunk is also useful
elsewhere, so this change introduces tryChunkOf which won't throw, but
might return nil. It also updates the documentation of chunkOf.

Fixes #41296.

Change-Id: Id5ae0ca3234480de1724fdf2e3677eeedcf76fa0
Reviewed-on: https://go-review.googlesource.com/c/go/+/253777
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/export_test.go | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 3307000c51..929bb35db6 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -358,7 +358,11 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 		}
 
 		for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
-			pg := mheap_.pages.chunkOf(i).scavenged.popcntRange(0, pallocChunkPages)
+			chunk := mheap_.pages.tryChunkOf(i)
+			if chunk == nil {
+				continue
+			}
+			pg := chunk.scavenged.popcntRange(0, pallocChunkPages)
 			slow.HeapReleased += uint64(pg) * pageSize
 		}
 		for _, p := range allp {
@@ -756,11 +760,7 @@ func (p *PageAlloc) InUse() []AddrRange {
 // Returns nil if the PallocData's L2 is missing.
 func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
 	ci := chunkIdx(i)
-	l2 := (*pageAlloc)(p).chunks[ci.l1()]
-	if l2 == nil {
-		return nil
-	}
-	return (*PallocData)(&l2[ci.l2()])
+	return (*PallocData)((*pageAlloc)(p).tryChunkOf(ci))
 }
 
 // AddrRange represents a range over addresses.
@@ -900,7 +900,10 @@ func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) {
 		lock(&mheap_.lock)
 	chunkLoop:
 		for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
-			chunk := mheap_.pages.chunkOf(i)
+			chunk := mheap_.pages.tryChunkOf(i)
+			if chunk == nil {
+				continue
+			}
 			for j := 0; j < pallocChunkPages/64; j++ {
 				// Run over each 64-bit bitmap section and ensure
 				// scavenged is being cleared properly on allocation.
-- 
cgit v1.3


From 2ae2a94857cb17a98a86a8332d6f76863982bf59 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 16 Sep 2020 16:22:28 +0000
Subject: runtime: fix leak and locking in BenchmarkMSpanCountAlloc

CL 249917 made the mspan in MSpanCountAlloc no longer stack-allocated
(for good reason), but then allocated an mspan on each call and did not
free it, resulting in a leak. That allocation was also not protected by
the heap lock, which could lead to data corruption of mheap fields and
the spanalloc.

To fix this, export some functions to allocate/free dummy mspans from
spanalloc (with proper locking) and allocate just one up-front for the
benchmark, freeing it at the end. Then, update MSpanCountAlloc to accept
a dummy mspan.

Note that we need to allocate the dummy mspan up-front otherwise we
measure things like heap locking and fixalloc performance instead of
what we actually want to measure: how fast we can do a popcount on the
mark bits.

Fixes #41391.

Change-Id: If6629a6ec1ece639c7fb78532045837a8c872c04
Reviewed-on: https://go-review.googlesource.com/c/go/+/255297
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/export_test.go | 28 +++++++++++++++++++++++++---
 src/runtime/gc_test.go     |  6 +++++-
 2 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 929bb35db6..e3d6441c18 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -983,9 +983,31 @@ func MapHashCheck(m interface{}, k interface{}) (uintptr, uintptr) {
 	return x, y
 }
 
-func MSpanCountAlloc(bits []byte) int {
-	s := (*mspan)(mheap_.spanalloc.alloc())
+// mspan wrapper for testing.
+//go:notinheap
+type MSpan mspan
+
+// Allocate an mspan for testing.
+func AllocMSpan() *MSpan {
+	var s *mspan
+	systemstack(func() {
+		s = (*mspan)(mheap_.spanalloc.alloc())
+	})
+	return (*MSpan)(s)
+}
+
+// Free an allocated mspan.
+func FreeMSpan(s *MSpan) {
+	systemstack(func() {
+		mheap_.spanalloc.free(unsafe.Pointer(s))
+	})
+}
+
+func MSpanCountAlloc(ms *MSpan, bits []byte) int {
+	s := (*mspan)(ms)
 	s.nelems = uintptr(len(bits) * 8)
 	s.gcmarkBits = (*gcBits)(unsafe.Pointer(&bits[0]))
-	return s.countAlloc()
+	result := s.countAlloc()
+	s.gcmarkBits = nil
+	return result
 }
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index c5c8a4cecf..9edebdada6 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -763,6 +763,10 @@ func BenchmarkScanStackNoLocals(b *testing.B) {
 }
 
 func BenchmarkMSpanCountAlloc(b *testing.B) {
+	// Allocate one dummy mspan for the whole benchmark.
+	s := runtime.AllocMSpan()
+	defer runtime.FreeMSpan(s)
+
 	// n is the number of bytes to benchmark against.
 	// n must always be a multiple of 8, since gcBits is
 	// always rounded up 8 bytes.
@@ -774,7 +778,7 @@ func BenchmarkMSpanCountAlloc(b *testing.B) {
 
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
-				runtime.MSpanCountAlloc(bits)
+				runtime.MSpanCountAlloc(s, bits)
 			}
 		})
 	}
-- 
cgit v1.3


From 10dfb1dd3d1d26122cf18f29468ec17eb7222c3f Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 16 Sep 2020 17:08:55 +0000
Subject: runtime: actually fix locking in BenchmarkMSpanCountAlloc

I just submitted CL 255297 which mostly fixed this problem, but totally
forgot to actually acquire/release the heap lock. Oops.

Updates #41391.

Change-Id: I45b42f20a9fc765c4de52476db3654d4bfe9feb3
Reviewed-on: https://go-review.googlesource.com/c/go/+/255298
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
---
 src/runtime/export_test.go | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index e3d6441c18..f2fa11dc98 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -991,7 +991,9 @@ type MSpan mspan
 func AllocMSpan() *MSpan {
 	var s *mspan
 	systemstack(func() {
+		lock(&mheap_.lock)
 		s = (*mspan)(mheap_.spanalloc.alloc())
+		unlock(&mheap_.lock)
 	})
 	return (*MSpan)(s)
 }
@@ -999,7 +1001,9 @@ func AllocMSpan() *MSpan {
 // Free an allocated mspan.
 func FreeMSpan(s *MSpan) {
 	systemstack(func() {
+		lock(&mheap_.lock)
 		mheap_.spanalloc.free(unsafe.Pointer(s))
+		unlock(&mheap_.lock)
 	})
 }
 
-- 
cgit v1.3


From b4a06b20897fe7ea3be715cb51040a2ccc52c15b Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Tue, 14 Jul 2020 20:27:27 +0000
Subject: runtime: define the AddrRange used for testing in terms of addrRange

Currently the AddrRange used for testing is defined separately from
addrRange in the runtime, making it difficult to test it as well as
addrRanges. Redefine AddrRange in terms of addrRange instead.

For #40191.

Change-Id: I3aa5b8df3e4c9a3c494b46ab802dd574b2488141
Reviewed-on: https://go-review.googlesource.com/c/go/+/242677
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Austin Clements <austin@google.com>
---
 src/runtime/export_test.go     | 30 ++++++++++---
 src/runtime/mpagealloc_test.go | 98 +++++++++++++++++++++---------------------
 2 files changed, 72 insertions(+), 56 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index f2fa11dc98..25b251f4ba 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -749,10 +749,7 @@ func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) {
 func (p *PageAlloc) InUse() []AddrRange {
 	ranges := make([]AddrRange, 0, len(p.inUse.ranges))
 	for _, r := range p.inUse.ranges {
-		ranges = append(ranges, AddrRange{
-			Base:  r.base.addr(),
-			Limit: r.limit.addr(),
-		})
+		ranges = append(ranges, AddrRange{r})
 	}
 	return ranges
 }
@@ -763,10 +760,29 @@ func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
 	return (*PallocData)((*pageAlloc)(p).tryChunkOf(ci))
 }
 
-// AddrRange represents a range over addresses.
-// Specifically, it represents the range [Base, Limit).
+// AddrRange is a wrapper around addrRange for testing.
 type AddrRange struct {
-	Base, Limit uintptr
+	addrRange
+}
+
+// MakeAddrRange creates a new address range.
+func MakeAddrRange(base, limit uintptr) AddrRange {
+	return AddrRange{makeAddrRange(base, limit)}
+}
+
+// Base returns the virtual base address of the address range.
+func (a AddrRange) Base() uintptr {
+	return a.addrRange.base.addr()
+}
+
+// Base returns the virtual address of the limit of the address range.
+func (a AddrRange) Limit() uintptr {
+	return a.addrRange.limit.addr()
+}
+
+// Equals returns true if the two address ranges are exactly equal.
+func (a AddrRange) Equals(b AddrRange) bool {
+	return a == b
 }
 
 // BitRange represents a range over a bitmap.
diff --git a/src/runtime/mpagealloc_test.go b/src/runtime/mpagealloc_test.go
index 65ba71d459..5d979fa95b 100644
--- a/src/runtime/mpagealloc_test.go
+++ b/src/runtime/mpagealloc_test.go
@@ -54,7 +54,7 @@ func TestPageAllocGrow(t *testing.T) {
 				BaseChunkIdx,
 			},
 			inUse: []AddrRange{
-				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+				MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)),
 			},
 		},
 		"Contiguous2": {
@@ -63,7 +63,7 @@ func TestPageAllocGrow(t *testing.T) {
 				BaseChunkIdx + 1,
 			},
 			inUse: []AddrRange{
-				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
+				MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)),
 			},
 		},
 		"Contiguous5": {
@@ -75,7 +75,7 @@ func TestPageAllocGrow(t *testing.T) {
 				BaseChunkIdx + 4,
 			},
 			inUse: []AddrRange{
-				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+5, 0)},
+				MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+5, 0)),
 			},
 		},
 		"Discontiguous": {
@@ -85,9 +85,9 @@ func TestPageAllocGrow(t *testing.T) {
 				BaseChunkIdx + 4,
 			},
 			inUse: []AddrRange{
-				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
-				{PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)},
-				{PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+				MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)),
 			},
 		},
 		"Mixed": {
@@ -98,8 +98,8 @@ func TestPageAllocGrow(t *testing.T) {
 				BaseChunkIdx + 4,
 			},
 			inUse: []AddrRange{
-				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+3, 0)},
-				{PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+				MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+3, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)),
 			},
 		},
 		"WildlyDiscontiguous": {
@@ -110,9 +110,9 @@ func TestPageAllocGrow(t *testing.T) {
 				BaseChunkIdx + 0x21,
 			},
 			inUse: []AddrRange{
-				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
-				{PageBase(BaseChunkIdx+0x10, 0), PageBase(BaseChunkIdx+0x11, 0)},
-				{PageBase(BaseChunkIdx+0x21, 0), PageBase(BaseChunkIdx+0x22, 0)},
+				MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+0x10, 0), PageBase(BaseChunkIdx+0x11, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+0x21, 0), PageBase(BaseChunkIdx+0x22, 0)),
 			},
 		},
 		"ManyDiscontiguous": {
@@ -129,39 +129,39 @@ func TestPageAllocGrow(t *testing.T) {
 				BaseChunkIdx + 64,
 			},
 			inUse: []AddrRange{
-				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
-				{PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)},
-				{PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
-				{PageBase(BaseChunkIdx+6, 0), PageBase(BaseChunkIdx+7, 0)},
-				{PageBase(BaseChunkIdx+8, 0), PageBase(BaseChunkIdx+9, 0)},
-				{PageBase(BaseChunkIdx+10, 0), PageBase(BaseChunkIdx+11, 0)},
-				{PageBase(BaseChunkIdx+12, 0), PageBase(BaseChunkIdx+13, 0)},
-				{PageBase(BaseChunkIdx+14, 0), PageBase(BaseChunkIdx+15, 0)},
-				{PageBase(BaseChunkIdx+16, 0), PageBase(BaseChunkIdx+17, 0)},
-				{PageBase(BaseChunkIdx+18, 0), PageBase(BaseChunkIdx+19, 0)},
-				{PageBase(BaseChunkIdx+20, 0), PageBase(BaseChunkIdx+21, 0)},
-				{PageBase(BaseChunkIdx+22, 0), PageBase(BaseChunkIdx+23, 0)},
-				{PageBase(BaseChunkIdx+24, 0), PageBase(BaseChunkIdx+25, 0)},
-				{PageBase(BaseChunkIdx+26, 0), PageBase(BaseChunkIdx+27, 0)},
-				{PageBase(BaseChunkIdx+28, 0), PageBase(BaseChunkIdx+29, 0)},
-				{PageBase(BaseChunkIdx+30, 0), PageBase(BaseChunkIdx+31, 0)},
-				{PageBase(BaseChunkIdx+32, 0), PageBase(BaseChunkIdx+33, 0)},
-				{PageBase(BaseChunkIdx+34, 0), PageBase(BaseChunkIdx+35, 0)},
-				{PageBase(BaseChunkIdx+36, 0), PageBase(BaseChunkIdx+37, 0)},
-				{PageBase(BaseChunkIdx+38, 0), PageBase(BaseChunkIdx+39, 0)},
-				{PageBase(BaseChunkIdx+40, 0), PageBase(BaseChunkIdx+41, 0)},
-				{PageBase(BaseChunkIdx+42, 0), PageBase(BaseChunkIdx+43, 0)},
-				{PageBase(BaseChunkIdx+44, 0), PageBase(BaseChunkIdx+45, 0)},
-				{PageBase(BaseChunkIdx+46, 0), PageBase(BaseChunkIdx+47, 0)},
-				{PageBase(BaseChunkIdx+48, 0), PageBase(BaseChunkIdx+49, 0)},
-				{PageBase(BaseChunkIdx+50, 0), PageBase(BaseChunkIdx+51, 0)},
-				{PageBase(BaseChunkIdx+52, 0), PageBase(BaseChunkIdx+53, 0)},
-				{PageBase(BaseChunkIdx+54, 0), PageBase(BaseChunkIdx+55, 0)},
-				{PageBase(BaseChunkIdx+56, 0), PageBase(BaseChunkIdx+57, 0)},
-				{PageBase(BaseChunkIdx+58, 0), PageBase(BaseChunkIdx+59, 0)},
-				{PageBase(BaseChunkIdx+60, 0), PageBase(BaseChunkIdx+61, 0)},
-				{PageBase(BaseChunkIdx+62, 0), PageBase(BaseChunkIdx+63, 0)},
-				{PageBase(BaseChunkIdx+64, 0), PageBase(BaseChunkIdx+65, 0)},
+				MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+6, 0), PageBase(BaseChunkIdx+7, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+8, 0), PageBase(BaseChunkIdx+9, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+10, 0), PageBase(BaseChunkIdx+11, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+12, 0), PageBase(BaseChunkIdx+13, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+14, 0), PageBase(BaseChunkIdx+15, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+16, 0), PageBase(BaseChunkIdx+17, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+18, 0), PageBase(BaseChunkIdx+19, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+20, 0), PageBase(BaseChunkIdx+21, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+22, 0), PageBase(BaseChunkIdx+23, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+24, 0), PageBase(BaseChunkIdx+25, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+26, 0), PageBase(BaseChunkIdx+27, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+28, 0), PageBase(BaseChunkIdx+29, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+30, 0), PageBase(BaseChunkIdx+31, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+32, 0), PageBase(BaseChunkIdx+33, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+34, 0), PageBase(BaseChunkIdx+35, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+36, 0), PageBase(BaseChunkIdx+37, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+38, 0), PageBase(BaseChunkIdx+39, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+40, 0), PageBase(BaseChunkIdx+41, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+42, 0), PageBase(BaseChunkIdx+43, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+44, 0), PageBase(BaseChunkIdx+45, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+46, 0), PageBase(BaseChunkIdx+47, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+48, 0), PageBase(BaseChunkIdx+49, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+50, 0), PageBase(BaseChunkIdx+51, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+52, 0), PageBase(BaseChunkIdx+53, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+54, 0), PageBase(BaseChunkIdx+55, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+56, 0), PageBase(BaseChunkIdx+57, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+58, 0), PageBase(BaseChunkIdx+59, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+60, 0), PageBase(BaseChunkIdx+61, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+62, 0), PageBase(BaseChunkIdx+63, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+64, 0), PageBase(BaseChunkIdx+65, 0)),
 			},
 		},
 	}
@@ -172,8 +172,8 @@ func TestPageAllocGrow(t *testing.T) {
 				BaseChunkIdx + 0x100000, // constant translates to O(TiB)
 			},
 			inUse: []AddrRange{
-				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
-				{PageBase(BaseChunkIdx+0x100000, 0), PageBase(BaseChunkIdx+0x100001, 0)},
+				MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)),
+				MakeAddrRange(PageBase(BaseChunkIdx+0x100000, 0), PageBase(BaseChunkIdx+0x100001, 0)),
 			},
 		}
 	}
@@ -197,7 +197,7 @@ func TestPageAllocGrow(t *testing.T) {
 				t.Fail()
 			} else {
 				for i := range want {
-					if want[i] != got[i] {
+					if !want[i].Equals(got[i]) {
 						t.Fail()
 						break
 					}
@@ -207,11 +207,11 @@ func TestPageAllocGrow(t *testing.T) {
 				t.Logf("found inUse mismatch")
 				t.Logf("got:")
 				for i, r := range got {
-					t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
+					t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base(), r.Limit())
 				}
 				t.Logf("want:")
 				for i, r := range want {
-					t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
+					t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base(), r.Limit())
 				}
 			}
 		})
-- 
cgit v1.3


From e01a1c01f830e2398b773b803dce3238b1107ce9 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Tue, 14 Jul 2020 21:39:52 +0000
Subject: runtime: add tests for addrRanges.findSucc

This change adds a test suite for addrRanges.findSucc so we can change
the implementation more safely.

For #40191.

Change-Id: I14a834b6d54836cbc676eb0edb292ba6176705cc
Reviewed-on: https://go-review.googlesource.com/c/go/+/242678
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go  |  24 +++++++
 src/runtime/mranges_test.go | 172 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 src/runtime/mranges_test.go

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 25b251f4ba..605bcb2294 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -785,6 +785,30 @@ func (a AddrRange) Equals(b AddrRange) bool {
 	return a == b
 }
 
+// AddrRanges is a wrapper around addrRanges for testing.
+type AddrRanges struct {
+	addrRanges
+}
+
+// MakeAddrRanges creates a new addrRanges populated with
+// the ranges in a.
+func MakeAddrRanges(a ...AddrRange) AddrRanges {
+	// Methods that manipulate the backing store of addrRanges.ranges should
+	// not be used on the result from this function (e.g. add) since they may
+	// trigger reallocation.
+	ranges := make([]addrRange, 0, len(a))
+	for _, r := range a {
+		ranges = append(ranges, r.addrRange)
+	}
+	return AddrRanges{addrRanges{ranges: ranges, sysStat: new(uint64)}}
+}
+
+// FindSucc returns the successor to base. See addrRanges.findSucc
+// for more details.
+func (a *AddrRanges) FindSucc(base uintptr) int {
+	return a.findSucc(base)
+}
+
 // BitRange represents a range over a bitmap.
 type BitRange struct {
 	I, N uint // bit index and length in bits
diff --git a/src/runtime/mranges_test.go b/src/runtime/mranges_test.go
new file mode 100644
index 0000000000..3a9023adfa
--- /dev/null
+++ b/src/runtime/mranges_test.go
@@ -0,0 +1,172 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	. "runtime"
+	"testing"
+)
+
+func TestAddrRangesFindSucc(t *testing.T) {
+	var large []AddrRange
+	for i := 0; i < 100; i++ {
+		large = append(large, MakeAddrRange(5+uintptr(i)*5, 5+uintptr(i)*5+3))
+	}
+
+	type testt struct {
+		name   string
+		base   uintptr
+		expect int
+		ranges []AddrRange
+	}
+	tests := []testt{
+		{
+			name:   "Empty",
+			base:   12,
+			expect: 0,
+			ranges: []AddrRange{},
+		},
+		{
+			name:   "OneBefore",
+			base:   12,
+			expect: 0,
+			ranges: []AddrRange{
+				MakeAddrRange(14, 16),
+			},
+		},
+		{
+			name:   "OneWithin",
+			base:   14,
+			expect: 1,
+			ranges: []AddrRange{
+				MakeAddrRange(14, 16),
+			},
+		},
+		{
+			name:   "OneAfterLimit",
+			base:   16,
+			expect: 1,
+			ranges: []AddrRange{
+				MakeAddrRange(14, 16),
+			},
+		},
+		{
+			name:   "OneAfter",
+			base:   17,
+			expect: 1,
+			ranges: []AddrRange{
+				MakeAddrRange(14, 16),
+			},
+		},
+		{
+			name:   "ThreeBefore",
+			base:   3,
+			expect: 0,
+			ranges: []AddrRange{
+				MakeAddrRange(6, 10),
+				MakeAddrRange(12, 16),
+				MakeAddrRange(19, 22),
+			},
+		},
+		{
+			name:   "ThreeAfter",
+			base:   24,
+			expect: 3,
+			ranges: []AddrRange{
+				MakeAddrRange(6, 10),
+				MakeAddrRange(12, 16),
+				MakeAddrRange(19, 22),
+			},
+		},
+		{
+			name:   "ThreeBetween",
+			base:   11,
+			expect: 1,
+			ranges: []AddrRange{
+				MakeAddrRange(6, 10),
+				MakeAddrRange(12, 16),
+				MakeAddrRange(19, 22),
+			},
+		},
+		{
+			name:   "ThreeWithin",
+			base:   9,
+			expect: 1,
+			ranges: []AddrRange{
+				MakeAddrRange(6, 10),
+				MakeAddrRange(12, 16),
+				MakeAddrRange(19, 22),
+			},
+		},
+		{
+			name:   "Zero",
+			base:   0,
+			expect: 1,
+			ranges: []AddrRange{
+				MakeAddrRange(0, 10),
+			},
+		},
+		{
+			name:   "Max",
+			base:   ^uintptr(0),
+			expect: 1,
+			ranges: []AddrRange{
+				MakeAddrRange(^uintptr(0)-5, ^uintptr(0)),
+			},
+		},
+		{
+			name:   "LargeBefore",
+			base:   2,
+			expect: 0,
+			ranges: large,
+		},
+		{
+			name:   "LargeAfter",
+			base:   5 + uintptr(len(large))*5 + 30,
+			expect: len(large),
+			ranges: large,
+		},
+		{
+			name:   "LargeBetweenLow",
+			base:   14,
+			expect: 2,
+			ranges: large,
+		},
+		{
+			name:   "LargeBetweenHigh",
+			base:   249,
+			expect: 49,
+			ranges: large,
+		},
+		{
+			name:   "LargeWithinLow",
+			base:   25,
+			expect: 5,
+			ranges: large,
+		},
+		{
+			name:   "LargeWithinHigh",
+			base:   396,
+			expect: 79,
+			ranges: large,
+		},
+		{
+			name:   "LargeWithinMiddle",
+			base:   250,
+			expect: 50,
+			ranges: large,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			a := MakeAddrRanges(test.ranges...)
+			i := a.FindSucc(test.base)
+			if i != test.expect {
+				t.Fatalf("expected %d, got %d", test.expect, i)
+			}
+		})
+	}
+}
-- 
cgit v1.3


From 64dc25b2db5d5be55f3f2fde3daac6c8a2873235 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Tue, 14 Jul 2020 21:41:12 +0000
Subject: runtime: add tests for addrRanges.add

Change-Id: I249deb482df74068b0538e9d773b9a87bc5a6df3
Reviewed-on: https://go-review.googlesource.com/c/go/+/242681
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go  |  62 +++++++++++++++++++++++++-
 src/runtime/mranges_test.go | 103 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+), 2 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 605bcb2294..e65b7b8ea7 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -785,22 +785,64 @@ func (a AddrRange) Equals(b AddrRange) bool {
 	return a == b
 }
 
+// Size returns the size in bytes of the address range.
+func (a AddrRange) Size() uintptr {
+	return a.addrRange.size()
+}
+
 // AddrRanges is a wrapper around addrRanges for testing.
 type AddrRanges struct {
 	addrRanges
+	mutable bool
+}
+
+// NewAddrRanges creates a new empty addrRanges.
+//
+// Note that this initializes addrRanges just like in the
+// runtime, so its memory is persistentalloc'd. Call this
+// function sparingly since the memory it allocates is
+// leaked.
+//
+// This AddrRanges is mutable, so we can test methods like
+// Add.
+func NewAddrRanges() AddrRanges {
+	r := addrRanges{}
+	r.init(new(uint64))
+	return AddrRanges{r, true}
 }
 
 // MakeAddrRanges creates a new addrRanges populated with
 // the ranges in a.
+//
+// The returned AddrRanges is immutable, so methods like
+// Add will fail.
 func MakeAddrRanges(a ...AddrRange) AddrRanges {
 	// Methods that manipulate the backing store of addrRanges.ranges should
 	// not be used on the result from this function (e.g. add) since they may
-	// trigger reallocation.
+	// trigger reallocation. That would normally be fine, except the new
+	// backing store won't come from the heap, but from persistentalloc, so
+	// we'll leak some memory implicitly.
 	ranges := make([]addrRange, 0, len(a))
+	total := uintptr(0)
 	for _, r := range a {
 		ranges = append(ranges, r.addrRange)
+		total += r.Size()
+	}
+	return AddrRanges{addrRanges{
+		ranges:     ranges,
+		totalBytes: total,
+		sysStat:    new(uint64),
+	}, false}
+}
+
+// Ranges returns a copy of the ranges described by the
+// addrRanges.
+func (a *AddrRanges) Ranges() []AddrRange {
+	result := make([]AddrRange, 0, len(a.addrRanges.ranges))
+	for _, r := range a.addrRanges.ranges {
+		result = append(result, AddrRange{r})
 	}
-	return AddrRanges{addrRanges{ranges: ranges, sysStat: new(uint64)}}
+	return result
 }
 
 // FindSucc returns the successor to base. See addrRanges.findSucc
@@ -809,6 +851,22 @@ func (a *AddrRanges) FindSucc(base uintptr) int {
 	return a.findSucc(base)
 }
 
+// Add adds a new AddrRange to the AddrRanges.
+//
+// The AddrRange must be mutable (i.e. created by NewAddrRanges),
+// otherwise this method will throw.
+func (a *AddrRanges) Add(r AddrRange) {
+	if !a.mutable {
+		throw("attempt to mutate immutable AddrRanges")
+	}
+	a.add(r.addrRange)
+}
+
+// TotalBytes returns the totalBytes field of the addrRanges.
+func (a *AddrRanges) TotalBytes() uintptr {
+	return a.addrRanges.totalBytes
+}
+
 // BitRange represents a range over a bitmap.
 type BitRange struct {
 	I, N uint // bit index and length in bits
diff --git a/src/runtime/mranges_test.go b/src/runtime/mranges_test.go
index 3a9023adfa..ed439c56c2 100644
--- a/src/runtime/mranges_test.go
+++ b/src/runtime/mranges_test.go
@@ -9,6 +9,109 @@ import (
 	"testing"
 )
 
+func validateAddrRanges(t *testing.T, a *AddrRanges, want ...AddrRange) {
+	ranges := a.Ranges()
+	if len(ranges) != len(want) {
+		t.Errorf("want %v, got %v", want, ranges)
+		t.Fatal("different lengths")
+	}
+	gotTotalBytes := uintptr(0)
+	wantTotalBytes := uintptr(0)
+	for i := range ranges {
+		gotTotalBytes += ranges[i].Size()
+		wantTotalBytes += want[i].Size()
+		if ranges[i].Base() >= ranges[i].Limit() {
+			t.Error("empty range found")
+		}
+		// Ensure this is equivalent to what we want.
+		if !ranges[i].Equals(want[i]) {
+			t.Errorf("range %d: got [0x%x, 0x%x), want [0x%x, 0x%x)", i,
+				ranges[i].Base(), ranges[i].Limit(),
+				want[i].Base(), want[i].Limit(),
+			)
+		}
+		if i != 0 {
+			// Ensure the ranges are sorted.
+			if ranges[i-1].Base() >= ranges[i].Base() {
+				t.Errorf("ranges %d and %d are out of sorted order", i-1, i)
+			}
+			// Check for a failure to coalesce.
+			if ranges[i-1].Limit() == ranges[i].Base() {
+				t.Errorf("ranges %d and %d should have coalesced", i-1, i)
+			}
+			// Check if any ranges overlap. Because the ranges are sorted
+			// by base, it's sufficient to just check neighbors.
+			if ranges[i-1].Limit() > ranges[i].Base() {
+				t.Errorf("ranges %d and %d overlap", i-1, i)
+			}
+		}
+	}
+	if wantTotalBytes != gotTotalBytes {
+		t.Errorf("expected %d total bytes, got %d", wantTotalBytes, gotTotalBytes)
+	}
+	if b := a.TotalBytes(); b != gotTotalBytes {
+		t.Errorf("inconsistent total bytes: want %d, got %d", gotTotalBytes, b)
+	}
+	if t.Failed() {
+		t.Errorf("addrRanges: %v", ranges)
+		t.Fatal("detected bad addrRanges")
+	}
+}
+
+func TestAddrRangesAdd(t *testing.T) {
+	a := NewAddrRanges()
+
+	// First range.
+	a.Add(MakeAddrRange(512, 1024))
+	validateAddrRanges(t, &a,
+		MakeAddrRange(512, 1024),
+	)
+
+	// Coalesce up.
+	a.Add(MakeAddrRange(1024, 2048))
+	validateAddrRanges(t, &a,
+		MakeAddrRange(512, 2048),
+	)
+
+	// Add new independent range.
+	a.Add(MakeAddrRange(4096, 8192))
+	validateAddrRanges(t, &a,
+		MakeAddrRange(512, 2048),
+		MakeAddrRange(4096, 8192),
+	)
+
+	// Coalesce down.
+	a.Add(MakeAddrRange(3776, 4096))
+	validateAddrRanges(t, &a,
+		MakeAddrRange(512, 2048),
+		MakeAddrRange(3776, 8192),
+	)
+
+	// Coalesce up and down.
+	a.Add(MakeAddrRange(2048, 3776))
+	validateAddrRanges(t, &a,
+		MakeAddrRange(512, 8192),
+	)
+
+	// Push a bunch of independent ranges to the end to try and force growth.
+	expectedRanges := []AddrRange{MakeAddrRange(512, 8192)}
+	for i := uintptr(0); i < 64; i++ {
+		dRange := MakeAddrRange(8192+(i+1)*2048, 8192+(i+1)*2048+10)
+		a.Add(dRange)
+		expectedRanges = append(expectedRanges, dRange)
+		validateAddrRanges(t, &a, expectedRanges...)
+	}
+
+	// Push a bunch of independent ranges to the beginning to try and force growth.
+	var bottomRanges []AddrRange
+	for i := uintptr(0); i < 63; i++ {
+		dRange := MakeAddrRange(8+i*8, 8+i*8+4)
+		a.Add(dRange)
+		bottomRanges = append(bottomRanges, dRange)
+		validateAddrRanges(t, &a, append(bottomRanges, expectedRanges...)...)
+	}
+}
+
 func TestAddrRangesFindSucc(t *testing.T) {
 	var large []AddrRange
 	for i := 0; i < 100; i++ {
-- 
cgit v1.3


From 42019613df2d9b6ad39e8ccf80861e75666025a0 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 23 Jul 2020 21:02:05 +0000
Subject: runtime: make distributed/local malloc stats the source-of-truth

This change makes it so that various local malloc stats (excluding
heap_scan and local_tinyallocs) are no longer written first to mheap
fields but are instead accessed directly from each mcache.

This change is part of a move toward having stats be distributed, and
cleaning up some old code related to the stats.

Note that because there's no central source-of-truth, when an mcache
dies, it must donate its stats to another mcache. It's always safe to
donate to the mcache for the 0th P, so do that.

Change-Id: I2556093dbc27357cb9621c9b97671f3c00aa1173
Reviewed-on: https://go-review.googlesource.com/c/go/+/246964
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go | 26 ++++++++++++++++++--------
 src/runtime/mcache.go      | 31 +++++++++++++++++++++++++++++--
 src/runtime/mheap.go       |  7 ++-----
 src/runtime/mstats.go      | 41 +++++++++++++++++++----------------------
 src/runtime/proc.go        |  2 +-
 5 files changed, 69 insertions(+), 38 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index e65b7b8ea7..d5a90ca65b 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -339,18 +339,28 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 
 		// Add in frees. readmemstats_m flushed the cached stats, so
 		// these are up-to-date.
-		var smallFree uint64
-		slow.Frees = mheap_.nlargefree
-		for i := range mheap_.nsmallfree {
-			slow.Frees += mheap_.nsmallfree[i]
-			bySize[i].Frees = mheap_.nsmallfree[i]
-			bySize[i].Mallocs += mheap_.nsmallfree[i]
-			smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i])
+		var largeFree, smallFree uint64
+		for _, p := range allp {
+			c := p.mcache
+			if c == nil {
+				continue
+			}
+			// Collect large allocation stats.
+			largeFree += uint64(c.local_largefree)
+			slow.Frees += uint64(c.local_nlargefree)
+
+			// Collect per-sizeclass stats.
+			for i := 0; i < _NumSizeClasses; i++ {
+				slow.Frees += uint64(c.local_nsmallfree[i])
+				bySize[i].Frees += uint64(c.local_nsmallfree[i])
+				bySize[i].Mallocs += uint64(c.local_nsmallfree[i])
+				smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i])
+			}
 		}
 		slow.Frees += memstats.tinyallocs
 		slow.Mallocs += slow.Frees
 
-		slow.TotalAlloc = slow.Alloc + mheap_.largefree + smallFree
+		slow.TotalAlloc = slow.Alloc + largeFree + smallFree
 
 		for i := range slow.BySize {
 			slow.BySize[i].Mallocs = bySize[i].Mallocs
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index 7a7d33ccae..5baa7b3da8 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -41,7 +41,13 @@ type mcache struct {
 
 	stackcache [_NumStackOrders]stackfreelist
 
-	// Local allocator stats, flushed during GC.
+	// Allocator stats (source-of-truth).
+	// Only the P that owns this mcache may write to these
+	// variables, so it's safe for that P to read non-atomically.
+	//
+	// When read with stats from other mcaches and with the world
+	// stopped, the result will accurately reflect the state of the
+	// application.
 	local_largefree  uintptr                  // bytes freed for large objects (>maxsmallsize)
 	local_nlargefree uintptr                  // number of frees for large objects (>maxsmallsize)
 	local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
@@ -97,7 +103,13 @@ func allocmcache() *mcache {
 	return c
 }
 
-func freemcache(c *mcache) {
+// freemcache releases resources associated with this
+// mcache and puts the object onto a free list.
+//
+// In some cases there is no way to simply release
+// resources, such as statistics, so donate them to
+// a different mcache (the recipient).
+func freemcache(c *mcache, recipient *mcache) {
 	systemstack(func() {
 		c.releaseAll()
 		stackcache_clear(c)
@@ -109,11 +121,26 @@ func freemcache(c *mcache) {
 
 		lock(&mheap_.lock)
 		purgecachedstats(c)
+		// Donate anything else that's left.
+		c.donate(recipient)
 		mheap_.cachealloc.free(unsafe.Pointer(c))
 		unlock(&mheap_.lock)
 	})
 }
 
+// donate flushes data and resources which have no global
+// pool to another mcache.
+func (c *mcache) donate(d *mcache) {
+	d.local_largefree += c.local_largefree
+	c.local_largefree = 0
+	d.local_nlargefree += c.local_nlargefree
+	c.local_nlargefree = 0
+	for i := range c.local_nsmallfree {
+		d.local_nsmallfree[i] += c.local_nsmallfree[i]
+		c.local_nsmallfree[i] = 0
+	}
+}
+
 // refill acquires a new span of span class spc for c. This span will
 // have at least one free object. The current span in c must be full.
 //
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 124bbacd1d..1b41b204ab 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -129,11 +129,8 @@ type mheap struct {
 	reclaimCredit uintptr
 
 	// Malloc stats.
-	largealloc  uint64                  // bytes allocated for large objects
-	nlargealloc uint64                  // number of large object allocations
-	largefree   uint64                  // bytes freed for large objects (>maxsmallsize)
-	nlargefree  uint64                  // number of frees for large objects (>maxsmallsize)
-	nsmallfree  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
+	largealloc  uint64 // bytes allocated for large objects
+	nlargealloc uint64 // number of large object allocations
 
 	// arenas is the heap arena map. It points to the metadata for
 	// the heap for every arena frame of the entire usable virtual
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 8cc20552fb..d81d2ebe81 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -571,21 +571,27 @@ func updatememstats() {
 		memstats.by_size[i].nmalloc += c.nmalloc
 		totalAlloc += c.nmalloc * uint64(class_to_size[i])
 	}
-	// Collect per-sizeclass stats.
-	for i := 0; i < _NumSizeClasses; i++ {
-		if i == 0 {
-			memstats.nmalloc += mheap_.nlargealloc
-			totalAlloc += mheap_.largealloc
-			totalFree += mheap_.largefree
-			memstats.nfree += mheap_.nlargefree
+
+	for _, p := range allp {
+		c := p.mcache
+		if c == nil {
 			continue
 		}
-
-		// The mcache stats have been flushed to mheap_.
-		memstats.nfree += mheap_.nsmallfree[i]
-		memstats.by_size[i].nfree = mheap_.nsmallfree[i]
-		smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i])
+		// Collect large allocation stats.
+		totalFree += uint64(c.local_largefree)
+		memstats.nfree += uint64(c.local_nlargefree)
+
+		// Collect per-sizeclass stats.
+		for i := 0; i < _NumSizeClasses; i++ {
+			memstats.nfree += uint64(c.local_nsmallfree[i])
+			memstats.by_size[i].nfree += uint64(c.local_nsmallfree[i])
+			smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i])
+		}
 	}
+	// Collect remaining large allocation stats.
+	memstats.nmalloc += mheap_.nlargealloc
+	totalAlloc += mheap_.largealloc
+
 	totalFree += smallFree
 
 	memstats.nfree += memstats.tinyallocs
@@ -641,20 +647,11 @@ func flushallmcaches() {
 
 //go:nosplit
 func purgecachedstats(c *mcache) {
-	// Protected by either heap or GC lock.
-	h := &mheap_
+	// Protected by heap lock.
 	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
 	c.local_scan = 0
 	memstats.tinyallocs += uint64(c.local_tinyallocs)
 	c.local_tinyallocs = 0
-	h.largefree += uint64(c.local_largefree)
-	c.local_largefree = 0
-	h.nlargefree += uint64(c.local_nlargefree)
-	c.local_nlargefree = 0
-	for i := 0; i < len(c.local_nsmallfree); i++ {
-		h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
-		c.local_nsmallfree[i] = 0
-	}
 }
 
 // Atomically increases a given *system* memory stat. We are counting on this
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index ebecc92745..4f4cff38aa 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -4550,7 +4550,7 @@ func (pp *p) destroy() {
 		pp.mspancache.len = 0
 		pp.pcache.flush(&mheap_.pages)
 	})
-	freemcache(pp.mcache)
+	freemcache(pp.mcache, allp[0].mcache)
 	pp.mcache = nil
 	gfpurge(pp)
 	traceProcFree(pp)
-- 
cgit v1.3


From cca3d1e5533cb40beb9ef55bbc332b733adcc6ba Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 23 Jul 2020 22:16:46 +0000
Subject: runtime: don't flush local_tinyallocs

This change makes local_tinyallocs work like the rest of the malloc
stats and doesn't flush local_tinyallocs, instead making that the
source-of-truth.

Change-Id: I3e6cb5f1b3d086e432ce7d456895511a48e3617a
Reviewed-on: https://go-review.googlesource.com/c/go/+/246967
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go | 7 +++++--
 src/runtime/mcache.go      | 8 +++++---
 src/runtime/mheap.go       | 4 ----
 src/runtime/mstats.go      | 6 ++++--
 4 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index d5a90ca65b..d71b180f76 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -339,7 +339,7 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 
 		// Add in frees. readmemstats_m flushed the cached stats, so
 		// these are up-to-date.
-		var largeFree, smallFree uint64
+		var tinyAllocs, largeFree, smallFree uint64
 		for _, p := range allp {
 			c := p.mcache
 			if c == nil {
@@ -349,6 +349,9 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 			largeFree += uint64(c.local_largefree)
 			slow.Frees += uint64(c.local_nlargefree)
 
+			// Collect tiny allocation stats.
+			tinyAllocs += uint64(c.local_tinyallocs)
+
 			// Collect per-sizeclass stats.
 			for i := 0; i < _NumSizeClasses; i++ {
 				slow.Frees += uint64(c.local_nsmallfree[i])
@@ -357,7 +360,7 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 				smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i])
 			}
 		}
-		slow.Frees += memstats.tinyallocs
+		slow.Frees += tinyAllocs
 		slow.Mallocs += slow.Frees
 
 		slow.TotalAlloc = slow.Alloc + largeFree + smallFree
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index 4d2ba6dff0..fe603116a2 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -32,9 +32,8 @@ type mcache struct {
 	// tiny is a heap pointer. Since mcache is in non-GC'd memory,
 	// we handle it by clearing it in releaseAll during mark
 	// termination.
-	tiny             uintptr
-	tinyoffset       uintptr
-	local_tinyallocs uintptr // number of tiny allocs not counted in other stats
+	tiny       uintptr
+	tinyoffset uintptr
 
 	// The rest is not accessed on every malloc.
 
@@ -49,6 +48,7 @@ type mcache struct {
 	// When read with stats from other mcaches and with the world
 	// stopped, the result will accurately reflect the state of the
 	// application.
+	local_tinyallocs  uintptr                  // number of tiny allocs not counted in other stats
 	local_largealloc  uintptr                  // bytes allocated for large objects
 	local_nlargealloc uintptr                  // number of large object allocations
 	local_nsmallalloc [_NumSizeClasses]uintptr // number of allocs for small objects
@@ -151,6 +151,8 @@ func (c *mcache) donate(d *mcache) {
 		d.local_nsmallfree[i] += c.local_nsmallfree[i]
 		c.local_nsmallfree[i] = 0
 	}
+	d.local_tinyallocs += c.local_tinyallocs
+	c.local_tinyallocs = 0
 }
 
 // refill acquires a new span of span class spc for c. This span will
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 5635dc6784..47f86ee38c 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -1163,8 +1163,6 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS
 		}
 		atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
 		c.local_scan = 0
-		memstats.tinyallocs += uint64(c.local_tinyallocs)
-		c.local_tinyallocs = 0
 
 		// heap_scan was been updated.
 		if gcBlackenEnabled != 0 {
@@ -1358,8 +1356,6 @@ func (h *mheap) freeSpan(s *mspan) {
 		lock(&h.lock)
 		atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
 		c.local_scan = 0
-		memstats.tinyallocs += uint64(c.local_tinyallocs)
-		c.local_tinyallocs = 0
 		if msanenabled {
 			// Tell msan that this entire span is no longer in use.
 			base := unsafe.Pointer(s.base())
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 44cf17c85b..341906fced 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -550,6 +550,7 @@ func updatememstats() {
 	memstats.total_alloc = 0
 	memstats.nmalloc = 0
 	memstats.nfree = 0
+	memstats.tinyallocs = 0
 	for i := 0; i < len(memstats.by_size); i++ {
 		memstats.by_size[i].nmalloc = 0
 		memstats.by_size[i].nfree = 0
@@ -572,6 +573,9 @@ func updatememstats() {
 		totalFree += uint64(c.local_largefree)
 		memstats.nfree += uint64(c.local_nlargefree)
 
+		// Collect tiny allocation stats.
+		memstats.tinyallocs += uint64(c.local_tinyallocs)
+
 		// Collect per-sizeclass stats.
 		for i := 0; i < _NumSizeClasses; i++ {
 			// Malloc stats.
@@ -644,8 +648,6 @@ func purgecachedstats(c *mcache) {
 	// Protected by heap lock.
 	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
 	c.local_scan = 0
-	memstats.tinyallocs += uint64(c.local_tinyallocs)
-	c.local_tinyallocs = 0
 }
 
 // Atomically increases a given *system* memory stat. We are counting on this
-- 
cgit v1.3


From c8638498008f9874dc5a48734418e0fbea08cee9 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Fri, 24 Jul 2020 19:58:31 +0000
Subject: runtime: rename mcache fields to match Go style

This change renames a bunch of malloc statistics stored in the mcache
that are all named with the "local_" prefix. It also renames largeAlloc
to allocLarge to prevent a naming conflict, and next_sample because it
would be the last mcache field with the old C naming style.

Change-Id: I29695cb83b397a435ede7e9ad5c3c9be72767ea3
Reviewed-on: https://go-review.googlesource.com/c/go/+/246969
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go      | 14 ++++----
 src/runtime/malloc.go           | 12 +++----
 src/runtime/mcache.go           | 78 ++++++++++++++++++++---------------------
 src/runtime/mgc.go              |  8 ++---
 src/runtime/mgcsweep.go         |  6 ++--
 src/runtime/mstats.go           | 22 ++++++------
 src/runtime/pprof/mprof_test.go |  2 +-
 7 files changed, 71 insertions(+), 71 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index d71b180f76..47cbc286f6 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -346,18 +346,18 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 				continue
 			}
 			// Collect large allocation stats.
-			largeFree += uint64(c.local_largefree)
-			slow.Frees += uint64(c.local_nlargefree)
+			largeFree += uint64(c.largeFree)
+			slow.Frees += uint64(c.largeFreeCount)
 
 			// Collect tiny allocation stats.
-			tinyAllocs += uint64(c.local_tinyallocs)
+			tinyAllocs += uint64(c.tinyAllocCount)
 
 			// Collect per-sizeclass stats.
 			for i := 0; i < _NumSizeClasses; i++ {
-				slow.Frees += uint64(c.local_nsmallfree[i])
-				bySize[i].Frees += uint64(c.local_nsmallfree[i])
-				bySize[i].Mallocs += uint64(c.local_nsmallfree[i])
-				smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i])
+				slow.Frees += uint64(c.smallFreeCount[i])
+				bySize[i].Frees += uint64(c.smallFreeCount[i])
+				bySize[i].Mallocs += uint64(c.smallFreeCount[i])
+				smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i])
 			}
 		}
 		slow.Frees += tinyAllocs
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index ec601ccb39..0f48d7f68e 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1040,7 +1040,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 				// The object fits into existing tiny block.
 				x = unsafe.Pointer(c.tiny + off)
 				c.tinyoffset = off + size
-				c.local_tinyallocs++
+				c.tinyAllocCount++
 				mp.mallocing = 0
 				releasem(mp)
 				return x
@@ -1082,7 +1082,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		}
 	} else {
 		shouldhelpgc = true
-		span = c.largeAlloc(size, needzero, noscan)
+		span = c.allocLarge(size, needzero, noscan)
 		span.freeindex = 1
 		span.allocCount = 1
 		x = unsafe.Pointer(span.base())
@@ -1111,7 +1111,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		} else {
 			scanSize = typ.ptrdata
 		}
-		c.local_scan += scanSize
+		c.scanAlloc += scanSize
 	}
 
 	// Ensure that the stores above that initialize x to
@@ -1153,8 +1153,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	}
 
 	if rate := MemProfileRate; rate > 0 {
-		if rate != 1 && size < c.next_sample {
-			c.next_sample -= size
+		if rate != 1 && size < c.nextSample {
+			c.nextSample -= size
 		} else {
 			mp := acquirem()
 			profilealloc(mp, x, size)
@@ -1221,7 +1221,7 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
 			throw("profilealloc called with no P")
 		}
 	}
-	c.next_sample = nextSample()
+	c.nextSample = nextSample()
 	mProf_Malloc(x, size)
 }
 
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index b8e388cc4f..c3e0e5e1f7 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -20,8 +20,8 @@ import (
 type mcache struct {
 	// The following members are accessed on every malloc,
 	// so they are grouped here for better caching.
-	next_sample uintptr // trigger heap sample after allocating this many bytes
-	local_scan  uintptr // bytes of scannable heap allocated
+	nextSample uintptr // trigger heap sample after allocating this many bytes
+	scanAlloc  uintptr // bytes of scannable heap allocated
 
 	// Allocator cache for tiny objects w/o pointers.
 	// See "Tiny allocator" comment in malloc.go.
@@ -48,13 +48,13 @@ type mcache struct {
 	// When read with stats from other mcaches and with the world
 	// stopped, the result will accurately reflect the state of the
 	// application.
-	local_tinyallocs  uintptr                  // number of tiny allocs not counted in other stats
-	local_largealloc  uintptr                  // bytes allocated for large objects
-	local_nlargealloc uintptr                  // number of large object allocations
-	local_nsmallalloc [_NumSizeClasses]uintptr // number of allocs for small objects
-	local_largefree   uintptr                  // bytes freed for large objects (>maxsmallsize)
-	local_nlargefree  uintptr                  // number of frees for large objects (>maxsmallsize)
-	local_nsmallfree  [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
+	tinyAllocCount  uintptr                  // number of tiny allocs not counted in other stats
+	largeAlloc      uintptr                  // bytes allocated for large objects
+	largeAllocCount uintptr                  // number of large object allocations
+	smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
+	largeFree       uintptr                  // bytes freed for large objects (>maxSmallSize)
+	largeFreeCount  uintptr                  // number of frees for large objects (>maxSmallSize)
+	smallFreeCount  [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
 
 	// flushGen indicates the sweepgen during which this mcache
 	// was last flushed. If flushGen != mheap_.sweepgen, the spans
@@ -103,7 +103,7 @@ func allocmcache() *mcache {
 	for i := range c.alloc {
 		c.alloc[i] = &emptymspan
 	}
-	c.next_sample = nextSample()
+	c.nextSample = nextSample()
 	return c
 }
 
@@ -134,26 +134,26 @@ func freemcache(c *mcache, recipient *mcache) {
 // donate flushes data and resources which have no global
 // pool to another mcache.
 func (c *mcache) donate(d *mcache) {
-	// local_scan is handled separately because it's not
+	// scanAlloc is handled separately because it's not
 	// like these stats -- it's used for GC pacing.
-	d.local_largealloc += c.local_largealloc
-	c.local_largealloc = 0
-	d.local_nlargealloc += c.local_nlargealloc
-	c.local_nlargealloc = 0
-	for i := range c.local_nsmallalloc {
-		d.local_nsmallalloc[i] += c.local_nsmallalloc[i]
-		c.local_nsmallalloc[i] = 0
+	d.largeAlloc += c.largeAlloc
+	c.largeAlloc = 0
+	d.largeAllocCount += c.largeAllocCount
+	c.largeAllocCount = 0
+	for i := range c.smallAllocCount {
+		d.smallAllocCount[i] += c.smallAllocCount[i]
+		c.smallAllocCount[i] = 0
 	}
-	d.local_largefree += c.local_largefree
-	c.local_largefree = 0
-	d.local_nlargefree += c.local_nlargefree
-	c.local_nlargefree = 0
-	for i := range c.local_nsmallfree {
-		d.local_nsmallfree[i] += c.local_nsmallfree[i]
-		c.local_nsmallfree[i] = 0
+	d.largeFree += c.largeFree
+	c.largeFree = 0
+	d.largeFreeCount += c.largeFreeCount
+	c.largeFreeCount = 0
+	for i := range c.smallFreeCount {
+		d.smallFreeCount[i] += c.smallFreeCount[i]
+		c.smallFreeCount[i] = 0
 	}
-	d.local_tinyallocs += c.local_tinyallocs
-	c.local_tinyallocs = 0
+	d.tinyAllocCount += c.tinyAllocCount
+	c.tinyAllocCount = 0
 }
 
 // refill acquires a new span of span class spc for c. This span will
@@ -192,16 +192,16 @@ func (c *mcache) refill(spc spanClass) {
 
 	// Assume all objects from this span will be allocated in the
 	// mcache. If it gets uncached, we'll adjust this.
-	c.local_nsmallalloc[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount)
+	c.smallAllocCount[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount)
 
 	// Update heap_live with the same assumption.
 	usedBytes := uintptr(s.allocCount) * s.elemsize
 	atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes))
 
-	// While we're here, flush local_scan, since we have to call
+	// While we're here, flush scanAlloc, since we have to call
 	// revise anyway.
-	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
-	c.local_scan = 0
+	atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc))
+	c.scanAlloc = 0
 
 	if trace.enabled {
 		// heap_live changed.
@@ -215,8 +215,8 @@ func (c *mcache) refill(spc spanClass) {
 	c.alloc[spc] = s
 }
 
-// largeAlloc allocates a span for a large object.
-func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
+// allocLarge allocates a span for a large object.
+func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) *mspan {
 	if size+_PageSize < size {
 		throw("out of memory")
 	}
@@ -235,8 +235,8 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 	if s == nil {
 		throw("out of memory")
 	}
-	c.local_largealloc += npages * pageSize
-	c.local_nlargealloc++
+	c.largeAlloc += npages * pageSize
+	c.largeAllocCount++
 
 	// Update heap_live and revise pacing if needed.
 	atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
@@ -257,9 +257,9 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 }
 
 func (c *mcache) releaseAll() {
-	// Take this opportunity to flush local_scan.
-	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
-	c.local_scan = 0
+	// Take this opportunity to flush scanAlloc.
+	atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc))
+	c.scanAlloc = 0
 
 	sg := mheap_.sweepgen
 	for i := range c.alloc {
@@ -267,7 +267,7 @@ func (c *mcache) releaseAll() {
 		if s != &emptymspan {
 			// Adjust nsmallalloc in case the span wasn't fully allocated.
 			n := uintptr(s.nelems) - uintptr(s.allocCount)
-			c.local_nsmallalloc[spanClass(i).sizeclass()] -= n
+			c.smallAllocCount[spanClass(i).sizeclass()] -= n
 			if s.sweepgen != sg+1 {
 				// refill conservatively counted unallocated slots in heap_live.
 				// Undo this.
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 55554c117c..540c376f1c 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -2086,16 +2086,16 @@ func gcMark(start_time int64) {
 	// Update the marked heap stat.
 	memstats.heap_marked = work.bytesMarked
 
-	// Flush local_scan from each mcache since we're about to modify
-	// heap_scan directly. If we were to flush this later, then local_scan
+	// Flush scanAlloc from each mcache since we're about to modify
+	// heap_scan directly. If we were to flush this later, then scanAlloc
 	// might have incorrect information.
 	for _, p := range allp {
 		c := p.mcache
 		if c == nil {
 			continue
 		}
-		memstats.heap_scan += uint64(c.local_scan)
-		c.local_scan = 0
+		memstats.heap_scan += uint64(c.scanAlloc)
+		c.scanAlloc = 0
 	}
 
 	// Update other GC heap size stats. This must happen after
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 6b8c56ce35..7103b08455 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -503,7 +503,7 @@ func (s *mspan) sweep(preserve bool) bool {
 			// wasn't totally filled, but then swept, still has all of its
 			// free slots zeroed.
 			s.needzero = 1
-			c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed)
+			c.smallFreeCount[spc.sizeclass()] += uintptr(nfreed)
 		}
 		if !preserve {
 			// The caller may not have removed this span from whatever
@@ -548,8 +548,8 @@ func (s *mspan) sweep(preserve bool) bool {
 			} else {
 				mheap_.freeSpan(s)
 			}
-			c.local_nlargefree++
-			c.local_largefree += size
+			c.largeFreeCount++
+			c.largeFree += size
 			return true
 		}
 
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 5eeb173640..64687c24e5 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -565,25 +565,25 @@ func updatememstats() {
 			continue
 		}
 		// Collect large allocation stats.
-		memstats.nmalloc += uint64(c.local_nlargealloc)
-		totalAlloc += uint64(c.local_largealloc)
-		totalFree += uint64(c.local_largefree)
-		memstats.nfree += uint64(c.local_nlargefree)
+		memstats.nmalloc += uint64(c.largeAllocCount)
+		totalAlloc += uint64(c.largeAlloc)
+		totalFree += uint64(c.largeFree)
+		memstats.nfree += uint64(c.largeFreeCount)
 
 		// Collect tiny allocation stats.
-		memstats.tinyallocs += uint64(c.local_tinyallocs)
+		memstats.tinyallocs += uint64(c.tinyAllocCount)
 
 		// Collect per-sizeclass stats.
 		for i := 0; i < _NumSizeClasses; i++ {
 			// Malloc stats.
-			memstats.nmalloc += uint64(c.local_nsmallalloc[i])
-			memstats.by_size[i].nmalloc += uint64(c.local_nsmallalloc[i])
-			totalAlloc += uint64(c.local_nsmallalloc[i]) * uint64(class_to_size[i])
+			memstats.nmalloc += uint64(c.smallAllocCount[i])
+			memstats.by_size[i].nmalloc += uint64(c.smallAllocCount[i])
+			totalAlloc += uint64(c.smallAllocCount[i]) * uint64(class_to_size[i])
 
 			// Free stats.
-			memstats.nfree += uint64(c.local_nsmallfree[i])
-			memstats.by_size[i].nfree += uint64(c.local_nsmallfree[i])
-			smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i])
+			memstats.nfree += uint64(c.smallFreeCount[i])
+			memstats.by_size[i].nfree += uint64(c.smallFreeCount[i])
+			smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i])
 		}
 	}
 
diff --git a/src/runtime/pprof/mprof_test.go b/src/runtime/pprof/mprof_test.go
index f253f07def..c11a45fd69 100644
--- a/src/runtime/pprof/mprof_test.go
+++ b/src/runtime/pprof/mprof_test.go
@@ -70,7 +70,7 @@ func TestMemoryProfiler(t *testing.T) {
 		runtime.MemProfileRate = oldRate
 	}()
 
-	// Allocate a meg to ensure that mcache.next_sample is updated to 1.
+	// Allocate a meg to ensure that mcache.nextSample is updated to 1.
 	for i := 0; i < 1024; i++ {
 		memSink = make([]byte, 1024)
 	}
-- 
cgit v1.3


From 8ebc58452af3a586a3da1f68725bc83c78d4b073 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 29 Jul 2020 20:25:05 +0000
Subject: runtime: delineate which memstats are system stats with a type

This change modifies the type of several mstats fields to be a new type:
sysMemStat. This type has the same structure as the fields used to have.

The purpose of this change is to make it very clear which stats may be
used in various functions for accounting (usually the platform-specific
sys* functions, but there are others). Currently there's an implicit
understanding that the *uint64 value passed to these functions is some
kind of statistic whose value is atomically managed. This understanding
isn't inherently problematic, but we're about to change how some stats
(which currently use mSysStatInc and mSysStatDec) work, so we want to
make it very clear what the various requirements are around "sysStat".

This change also removes mSysStatInc and mSysStatDec in favor of a
method on sysMemStat. Note that those two functions were originally
written the way they were because atomic 64-bit adds required a valid G
on ARM, but this hasn't been the case for a very long time (since
golang.org/cl/14204, but even before then it wasn't clear if mutexes
required a valid G anymore). Today we implement 64-bit adds on ARM with
a spinlock table.

Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7
Reviewed-on: https://go-review.googlesource.com/c/go/+/246971
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go |  4 +--
 src/runtime/heapdump.go    | 14 ++++----
 src/runtime/malloc.go      | 10 +++---
 src/runtime/mem_aix.go     | 12 +++----
 src/runtime/mem_bsd.go     | 12 +++----
 src/runtime/mem_darwin.go  | 12 +++----
 src/runtime/mem_js.go      | 10 +++---
 src/runtime/mem_linux.go   | 12 +++----
 src/runtime/mem_plan9.go   | 12 +++----
 src/runtime/mem_windows.go | 12 +++----
 src/runtime/mfixalloc.go   |  4 +--
 src/runtime/mgcscavenge.go |  4 +--
 src/runtime/mheap.go       | 28 ++++++++--------
 src/runtime/mpagealloc.go  |  4 +--
 src/runtime/mranges.go     |  4 +--
 src/runtime/mstats.go      | 82 +++++++++++++++++-----------------------------
 src/runtime/os_darwin.go   |  3 +-
 17 files changed, 109 insertions(+), 130 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 47cbc286f6..cb753ee819 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -820,7 +820,7 @@ type AddrRanges struct {
 // Add.
 func NewAddrRanges() AddrRanges {
 	r := addrRanges{}
-	r.init(new(uint64))
+	r.init(new(sysMemStat))
 	return AddrRanges{r, true}
 }
 
@@ -844,7 +844,7 @@ func MakeAddrRanges(a ...AddrRange) AddrRanges {
 	return AddrRanges{addrRanges{
 		ranges:     ranges,
 		totalBytes: total,
-		sysStat:    new(uint64),
+		sysStat:    new(sysMemStat),
 	}, false}
 }
 
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 4c35309211..495ecc5164 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -548,20 +548,20 @@ func dumpmemstats() {
 	dumpint(memstats.nmalloc)
 	dumpint(memstats.nfree)
 	dumpint(memstats.heap_alloc)
-	dumpint(memstats.heap_sys)
+	dumpint(memstats.heap_sys.load())
 	dumpint(memstats.heap_idle)
 	dumpint(memstats.heap_inuse)
 	dumpint(memstats.heap_released)
 	dumpint(memstats.heap_objects)
 	dumpint(memstats.stacks_inuse)
-	dumpint(memstats.stacks_sys)
+	dumpint(memstats.stacks_sys.load())
 	dumpint(memstats.mspan_inuse)
-	dumpint(memstats.mspan_sys)
+	dumpint(memstats.mspan_sys.load())
 	dumpint(memstats.mcache_inuse)
-	dumpint(memstats.mcache_sys)
-	dumpint(memstats.buckhash_sys)
-	dumpint(memstats.gc_sys)
-	dumpint(memstats.other_sys)
+	dumpint(memstats.mcache_sys.load())
+	dumpint(memstats.buckhash_sys.load())
+	dumpint(memstats.gc_sys.load())
+	dumpint(memstats.other_sys.load())
 	dumpint(memstats.next_gc)
 	dumpint(memstats.last_gc_unix)
 	dumpint(memstats.pause_total_ns)
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 0f48d7f68e..27d678d917 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1313,7 +1313,7 @@ var persistentChunks *notInHeap
 // The returned memory will be zeroed.
 //
 // Consider marking persistentalloc'd types go:notinheap.
-func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+func persistentalloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	var p *notInHeap
 	systemstack(func() {
 		p = persistentalloc1(size, align, sysStat)
@@ -1324,7 +1324,7 @@ func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
 // Must run on system stack because stack growth can (re)invoke it.
 // See issue 9174.
 //go:systemstack
-func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
+func persistentalloc1(size, align uintptr, sysStat *sysMemStat) *notInHeap {
 	const (
 		maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
 	)
@@ -1383,8 +1383,8 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
 	}
 
 	if sysStat != &memstats.other_sys {
-		mSysStatInc(sysStat, size)
-		mSysStatDec(&memstats.other_sys, size)
+		sysStat.add(int64(size))
+		memstats.other_sys.add(-int64(size))
 	}
 	return p
 }
@@ -1425,7 +1425,7 @@ func (l *linearAlloc) init(base, size uintptr) {
 	l.end = base + size
 }
 
-func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+func (l *linearAlloc) alloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	p := alignUp(l.next, align)
 	if p+size > l.end {
 		return nil
diff --git a/src/runtime/mem_aix.go b/src/runtime/mem_aix.go
index 7e145b072a..957aa4dcc2 100644
--- a/src/runtime/mem_aix.go
+++ b/src/runtime/mem_aix.go
@@ -11,7 +11,7 @@ import (
 // Don't split the stack as this method may be invoked without a valid G, which
 // prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 	if err != 0 {
 		if err == _EACCES {
@@ -24,7 +24,7 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
 		}
 		return nil
 	}
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 	return p
 }
 
@@ -41,8 +41,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	munmap(v, n)
 
 }
@@ -59,8 +59,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 	return p
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 
 	// AIX does not allow mapping a range that is already mapped.
 	// So, call mprotect to change permissions.
diff --git a/src/runtime/mem_bsd.go b/src/runtime/mem_bsd.go
index 4d860e7bd3..bc672019fb 100644
--- a/src/runtime/mem_bsd.go
+++ b/src/runtime/mem_bsd.go
@@ -13,12 +13,12 @@ import (
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 	if err != 0 {
 		return nil
 	}
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 	return v
 }
 
@@ -35,8 +35,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	munmap(v, n)
 }
 
@@ -65,8 +65,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 const _sunosEAGAIN = 11
 const _ENOMEM = 12
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 
 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
 	if err == _ENOMEM || ((GOOS == "solaris" || GOOS == "illumos") && err == _sunosEAGAIN) {
diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go
index 3b5d565b0f..7fccd2bb8e 100644
--- a/src/runtime/mem_darwin.go
+++ b/src/runtime/mem_darwin.go
@@ -11,12 +11,12 @@ import (
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 	if err != 0 {
 		return nil
 	}
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 	return v
 }
 
@@ -39,8 +39,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	munmap(v, n)
 }
 
@@ -58,8 +58,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 
 const _ENOMEM = 12
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 
 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
 	if err == _ENOMEM {
diff --git a/src/runtime/mem_js.go b/src/runtime/mem_js.go
index 092b3d4fa2..957ed36ffa 100644
--- a/src/runtime/mem_js.go
+++ b/src/runtime/mem_js.go
@@ -13,7 +13,7 @@ import (
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	p := sysReserve(nil, n)
 	sysMap(p, n, sysStat)
 	return p
@@ -31,8 +31,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 }
 
 func sysFault(v unsafe.Pointer, n uintptr) {
@@ -80,6 +80,6 @@ func growMemory(pages int32) int32
 // This allows the front-end to replace the old DataView object with a new one.
 func resetMemoryDataView()
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 }
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
index 59b0bca970..3436851091 100644
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -17,7 +17,7 @@ const (
 // Don't split the stack as this method may be invoked without a valid G, which
 // prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 	if err != 0 {
 		if err == _EACCES {
@@ -30,7 +30,7 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
 		}
 		return nil
 	}
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 	return p
 }
 
@@ -144,8 +144,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	munmap(v, n)
 }
 
@@ -161,8 +161,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 	return p
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 
 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
 	if err == _ENOMEM {
diff --git a/src/runtime/mem_plan9.go b/src/runtime/mem_plan9.go
index 4fea851cdd..53d8e6dffa 100644
--- a/src/runtime/mem_plan9.go
+++ b/src/runtime/mem_plan9.go
@@ -140,19 +140,19 @@ func sbrk(n uintptr) unsafe.Pointer {
 	return unsafe.Pointer(bl)
 }
 
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	lock(&memlock)
 	p := memAlloc(n)
 	memCheck()
 	unlock(&memlock)
 	if p != nil {
-		mSysStatInc(sysStat, n)
+		sysStat.add(int64(n))
 	}
 	return p
 }
 
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	lock(&memlock)
 	if uintptr(v)+n == bloc {
 		// Address range being freed is at the end of memory,
@@ -176,10 +176,10 @@ func sysUsed(v unsafe.Pointer, n uintptr) {
 func sysHugePage(v unsafe.Pointer, n uintptr) {
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
 	// sysReserve has already allocated all heap memory,
 	// but has not adjusted stats.
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 }
 
 func sysFault(v unsafe.Pointer, n uintptr) {
diff --git a/src/runtime/mem_windows.go b/src/runtime/mem_windows.go
index 165062ec27..3a805b9767 100644
--- a/src/runtime/mem_windows.go
+++ b/src/runtime/mem_windows.go
@@ -24,8 +24,8 @@ const (
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
-	mSysStatInc(sysStat, n)
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
+	sysStat.add(int64(n))
 	return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_COMMIT|_MEM_RESERVE, _PAGE_READWRITE))
 }
 
@@ -97,8 +97,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	r := stdcall3(_VirtualFree, uintptr(v), 0, _MEM_RELEASE)
 	if r == 0 {
 		print("runtime: VirtualFree of ", n, " bytes failed with errno=", getlasterror(), "\n")
@@ -124,6 +124,6 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 	return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_RESERVE, _PAGE_READWRITE))
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 }
diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go
index f9dd6ca474..293c16b38b 100644
--- a/src/runtime/mfixalloc.go
+++ b/src/runtime/mfixalloc.go
@@ -32,7 +32,7 @@ type fixalloc struct {
 	chunk  uintptr // use uintptr instead of unsafe.Pointer to avoid write barriers
 	nchunk uint32
 	inuse  uintptr // in-use bytes now
-	stat   *uint64
+	stat   *sysMemStat
 	zero   bool // zero allocations
 }
 
@@ -49,7 +49,7 @@ type mlink struct {
 
 // Initialize f to allocate objects of the given size,
 // using the allocator to obtain chunks of memory.
-func (f *fixalloc) init(size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
+func (f *fixalloc) init(size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *sysMemStat) {
 	f.size = size
 	f.first = first
 	f.arg = arg
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 6328b295ca..8b1a0be353 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -100,7 +100,7 @@ const (
 
 // heapRetained returns an estimate of the current heap RSS.
 func heapRetained() uint64 {
-	return atomic.Load64(&memstats.heap_sys) - atomic.Load64(&memstats.heap_released)
+	return memstats.heap_sys.load() - atomic.Load64(&memstats.heap_released)
 }
 
 // gcPaceScavenger updates the scavenger's pacing, particularly
@@ -711,7 +711,7 @@ func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr
 
 	// Update global accounting only when not in test, otherwise
 	// the runtime's accounting will be wrong.
-	mSysStatInc(&memstats.heap_released, uintptr(npages)*pageSize)
+	atomic.Xadd64(&memstats.heap_released, int64(npages)*pageSize)
 	return addr
 }
 
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index df659e222b..27c1bfbcf1 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -1222,22 +1222,22 @@ HaveSpan:
 		// sysUsed all the pages that are actually available
 		// in the span since some of them might be scavenged.
 		sysUsed(unsafe.Pointer(base), nbytes)
-		mSysStatDec(&memstats.heap_released, scav)
+		atomic.Xadd64(&memstats.heap_released, -int64(scav))
 	}
 	// Update stats.
 	switch typ {
 	case spanAllocHeap:
-		mSysStatInc(&memstats.heap_inuse, nbytes)
+		atomic.Xadd64(&memstats.heap_inuse, int64(nbytes))
 	case spanAllocStack:
-		mSysStatInc(&memstats.stacks_inuse, nbytes)
+		atomic.Xadd64(&memstats.stacks_inuse, int64(nbytes))
 	case spanAllocPtrScalarBits, spanAllocWorkBuf:
-		mSysStatInc(&memstats.gc_sys, nbytes)
+		memstats.gc_sys.add(int64(nbytes))
 	}
 	if typ.manual() {
 		// Manually managed memory doesn't count toward heap_sys.
-		mSysStatDec(&memstats.heap_sys, nbytes)
+		memstats.heap_sys.add(-int64(nbytes))
 	}
-	mSysStatDec(&memstats.heap_idle, nbytes)
+	atomic.Xadd64(&memstats.heap_idle, -int64(nbytes))
 
 	// Publish the span in various locations.
 
@@ -1314,8 +1314,8 @@ func (h *mheap) grow(npage uintptr) bool {
 		// The allocation is always aligned to the heap arena
 		// size which is always > physPageSize, so its safe to
 		// just add directly to heap_released.
-		mSysStatInc(&memstats.heap_released, asize)
-		mSysStatInc(&memstats.heap_idle, asize)
+		atomic.Xadd64(&memstats.heap_released, int64(asize))
+		atomic.Xadd64(&memstats.heap_idle, int64(asize))
 
 		// Recalculate nBase.
 		// We know this won't overflow, because sysAlloc returned
@@ -1400,18 +1400,20 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
 	// Update stats.
 	//
 	// Mirrors the code in allocSpan.
+	nbytes := s.npages * pageSize
 	switch typ {
 	case spanAllocHeap:
-		mSysStatDec(&memstats.heap_inuse, s.npages*pageSize)
+		atomic.Xadd64(&memstats.heap_inuse, -int64(nbytes))
 	case spanAllocStack:
-		mSysStatDec(&memstats.stacks_inuse, s.npages*pageSize)
+		atomic.Xadd64(&memstats.stacks_inuse, -int64(nbytes))
 	case spanAllocPtrScalarBits, spanAllocWorkBuf:
-		mSysStatDec(&memstats.gc_sys, s.npages*pageSize)
+		memstats.gc_sys.add(-int64(nbytes))
 	}
 	if typ.manual() {
-		mSysStatInc(&memstats.heap_sys, s.npages*pageSize)
+		// Manually managed memory doesn't count toward heap_sys, so add it back.
+		memstats.heap_sys.add(int64(nbytes))
 	}
-	mSysStatInc(&memstats.heap_idle, s.npages*pageSize)
+	atomic.Xadd64(&memstats.heap_idle, int64(nbytes))
 
 	// Mark the space as free.
 	h.pages.free(s.base(), s.npages)
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 560babed03..2af1c97e0b 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -293,13 +293,13 @@ type pageAlloc struct {
 
 	// sysStat is the runtime memstat to update when new system
 	// memory is committed by the pageAlloc for allocation metadata.
-	sysStat *uint64
+	sysStat *sysMemStat
 
 	// Whether or not this struct is being used in tests.
 	test bool
 }
 
-func (p *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
+func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) {
 	if levelLogPages[0] > logMaxPackedValue {
 		// We can't represent 1<<levelLogPages[0] pages, the maximum number
 		// of pages we need to represent at the root level, in a summary, which
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go
index 1109f506a6..16acadcff1 100644
--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
@@ -160,10 +160,10 @@ type addrRanges struct {
 	totalBytes uintptr
 
 	// sysStat is the stat to track allocations by this type
-	sysStat *uint64
+	sysStat *sysMemStat
 }
 
-func (a *addrRanges) init(sysStat *uint64) {
+func (a *addrRanges) init(sysStat *sysMemStat) {
 	ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
 	ranges.len = 0
 	ranges.cap = 16
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 64687c24e5..571a9c9ce3 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -8,7 +8,6 @@ package runtime
 
 import (
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -35,11 +34,11 @@ type mstats struct {
 	//
 	// Like MemStats, heap_sys and heap_inuse do not count memory
 	// in manually-managed spans.
-	heap_alloc    uint64 // bytes allocated and not yet freed (same as alloc above)
-	heap_sys      uint64 // virtual address space obtained from system for GC'd heap
-	heap_idle     uint64 // bytes in idle spans
-	heap_inuse    uint64 // bytes in mSpanInUse spans
-	heap_released uint64 // bytes released to the os
+	heap_alloc    uint64     // bytes allocated and not yet freed (same as alloc above)
+	heap_sys      sysMemStat // virtual address space obtained from system for GC'd heap
+	heap_idle     uint64     // bytes in idle spans
+	heap_inuse    uint64     // bytes in mSpanInUse spans
+	heap_released uint64     // bytes released to the os
 
 	// heap_objects is not used by the runtime directly and instead
 	// computed on the fly by updatememstats.
@@ -47,15 +46,15 @@ type mstats struct {
 
 	// Statistics about allocation of low-level fixed-size structures.
 	// Protected by FixAlloc locks.
-	stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW
-	stacks_sys   uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
-	mspan_inuse  uint64 // mspan structures
-	mspan_sys    uint64
+	stacks_inuse uint64     // bytes in manually-managed stack spans; updated atomically or during STW
+	stacks_sys   sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
+	mspan_inuse  uint64     // mspan structures
+	mspan_sys    sysMemStat
 	mcache_inuse uint64 // mcache structures
-	mcache_sys   uint64
-	buckhash_sys uint64 // profiling bucket hash table
-	gc_sys       uint64 // updated atomically or during STW
-	other_sys    uint64 // updated atomically or during STW
+	mcache_sys   sysMemStat
+	buckhash_sys sysMemStat // profiling bucket hash table
+	gc_sys       sysMemStat // updated atomically or during STW
+	other_sys    sysMemStat // updated atomically or during STW
 
 	// Statistics about the garbage collector.
 
@@ -533,8 +532,9 @@ func updatememstats() {
 
 	memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
 	memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
-	memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
-		memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
+	memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() +
+		memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gc_sys.load() +
+		memstats.other_sys.load()
 
 	// We also count stacks_inuse as sys memory.
 	memstats.sys += memstats.stacks_inuse
@@ -625,46 +625,24 @@ func flushallmcaches() {
 	}
 }
 
-// Atomically increases a given *system* memory stat. We are counting on this
-// stat never overflowing a uintptr, so this function must only be used for
-// system memory stats.
+// sysMemStat represents a global system statistic that is managed atomically.
 //
-// The current implementation for little endian architectures is based on
-// xadduintptr(), which is less than ideal: xadd64() should really be used.
-// Using xadduintptr() is a stop-gap solution until arm supports xadd64() that
-// doesn't use locks.  (Locks are a problem as they require a valid G, which
-// restricts their useability.)
-//
-// A side-effect of using xadduintptr() is that we need to check for
-// overflow errors.
-//go:nosplit
-func mSysStatInc(sysStat *uint64, n uintptr) {
-	if sysStat == nil {
-		return
-	}
-	if sys.BigEndian {
-		atomic.Xadd64(sysStat, int64(n))
-		return
-	}
-	if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), n); val < n {
-		print("runtime: stat overflow: val ", val, ", n ", n, "\n")
-		exit(2)
-	}
+// This type must structurally be a uint64 so that mstats aligns with MemStats.
+type sysMemStat uint64
+
+// load atomically reads the value of the stat.
+func (s *sysMemStat) load() uint64 {
+	return atomic.Load64((*uint64)(s))
 }
 
-// Atomically decreases a given *system* memory stat. Same comments as
-// mSysStatInc apply.
-//go:nosplit
-func mSysStatDec(sysStat *uint64, n uintptr) {
-	if sysStat == nil {
-		return
-	}
-	if sys.BigEndian {
-		atomic.Xadd64(sysStat, -int64(n))
+// add atomically adds the sysMemStat by n.
+func (s *sysMemStat) add(n int64) {
+	if s == nil {
 		return
 	}
-	if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), uintptr(-int64(n))); val+n < n {
-		print("runtime: stat underflow: val ", val, ", n ", n, "\n")
-		exit(2)
+	val := atomic.Xadd64((*uint64)(s), n)
+	if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) {
+		print("runtime: val=", val, " n=", n, "\n")
+		throw("sysMemStat overflow")
 	}
 }
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
index 394bd6fb0f..3f5bb7cf96 100644
--- a/src/runtime/os_darwin.go
+++ b/src/runtime/os_darwin.go
@@ -198,7 +198,6 @@ func newosproc(mp *m) {
 		exit(1)
 	}
 	mp.g0.stack.hi = stacksize // for mstart
-	//mSysStatInc(&memstats.stacks_sys, stacksize) //TODO: do this?
 
 	// Tell the pthread library we won't join with this thread.
 	if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
@@ -247,7 +246,7 @@ func newosproc0(stacksize uintptr, fn uintptr) {
 		exit(1)
 	}
 	g0.stack.hi = stacksize // for mstart
-	mSysStatInc(&memstats.stacks_sys, stacksize)
+	memstats.stacks_sys.add(int64(stacksize))
 
 	// Tell the pthread library we won't join with this thread.
 	if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
-- 
cgit v1.3


From 79781e8dd382ac34e502ed6a088dff6860a08c05 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Tue, 4 Aug 2020 17:29:03 +0000
Subject: runtime: move malloc stats into consistentHeapStats

This change moves the mcache-local malloc stats into the
consistentHeapStats structure so the malloc stats can be managed
consistently with the memory stats. The one exception here is
tinyAllocs for which moving that into the global stats would incur
several atomic writes on the fast path. Microbenchmarks for just one CPU
core have shown a 50% loss in throughput. Since tiny allocation counnt
isn't exposed anyway and is always blindly added to both allocs and
frees, let that stay inconsistent and flush the tiny allocation count
every so often.

Change-Id: I2a4b75f209c0e659b9c0db081a3287bf227c10ca
Reviewed-on: https://go-review.googlesource.com/c/go/+/247039
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go | 37 ++++++++--------------
 src/runtime/malloc.go      |  2 +-
 src/runtime/mcache.go      | 70 ++++++++++++++---------------------------
 src/runtime/mgcsweep.go    | 10 ++++--
 src/runtime/mstats.go      | 78 ++++++++++++++++++++++++++--------------------
 src/runtime/proc.go        |  2 +-
 6 files changed, 90 insertions(+), 109 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index cb753ee819..ff901fd7be 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -337,33 +337,22 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 			}
 		}
 
-		// Add in frees. readmemstats_m flushed the cached stats, so
-		// these are up-to-date.
-		var tinyAllocs, largeFree, smallFree uint64
-		for _, p := range allp {
-			c := p.mcache
-			if c == nil {
-				continue
-			}
-			// Collect large allocation stats.
-			largeFree += uint64(c.largeFree)
-			slow.Frees += uint64(c.largeFreeCount)
-
-			// Collect tiny allocation stats.
-			tinyAllocs += uint64(c.tinyAllocCount)
-
-			// Collect per-sizeclass stats.
-			for i := 0; i < _NumSizeClasses; i++ {
-				slow.Frees += uint64(c.smallFreeCount[i])
-				bySize[i].Frees += uint64(c.smallFreeCount[i])
-				bySize[i].Mallocs += uint64(c.smallFreeCount[i])
-				smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i])
-			}
+		// Add in frees by just reading the stats for those directly.
+		var m heapStatsDelta
+		memstats.heapStats.unsafeRead(&m)
+
+		// Collect per-sizeclass free stats.
+		var smallFree uint64
+		for i := 0; i < _NumSizeClasses; i++ {
+			slow.Frees += uint64(m.smallFreeCount[i])
+			bySize[i].Frees += uint64(m.smallFreeCount[i])
+			bySize[i].Mallocs += uint64(m.smallFreeCount[i])
+			smallFree += uint64(m.smallFreeCount[i]) * uint64(class_to_size[i])
 		}
-		slow.Frees += tinyAllocs
+		slow.Frees += memstats.tinyallocs + uint64(m.largeFreeCount)
 		slow.Mallocs += slow.Frees
 
-		slow.TotalAlloc = slow.Alloc + largeFree + smallFree
+		slow.TotalAlloc = slow.Alloc + uint64(m.largeFree) + smallFree
 
 		for i := range slow.BySize {
 			slow.BySize[i].Mallocs = bySize[i].Mallocs
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 6383c34817..d0b8c668c3 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1028,7 +1028,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 				// The object fits into existing tiny block.
 				x = unsafe.Pointer(c.tiny + off)
 				c.tinyoffset = off + size
-				c.tinyAllocCount++
+				c.tinyAllocs++
 				mp.mallocing = 0
 				releasem(mp)
 				return x
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index e27a1c9ec0..c9342a41c9 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -32,8 +32,12 @@ type mcache struct {
 	// tiny is a heap pointer. Since mcache is in non-GC'd memory,
 	// we handle it by clearing it in releaseAll during mark
 	// termination.
+	//
+	// tinyAllocs is the number of tiny allocations performed
+	// by the P that owns this mcache.
 	tiny       uintptr
 	tinyoffset uintptr
+	tinyAllocs uintptr
 
 	// The rest is not accessed on every malloc.
 
@@ -41,21 +45,6 @@ type mcache struct {
 
 	stackcache [_NumStackOrders]stackfreelist
 
-	// Allocator stats (source-of-truth).
-	// Only the P that owns this mcache may write to these
-	// variables, so it's safe for that P to read non-atomically.
-	//
-	// When read with stats from other mcaches and with the world
-	// stopped, the result will accurately reflect the state of the
-	// application.
-	tinyAllocCount  uintptr                  // number of tiny allocs not counted in other stats
-	largeAlloc      uintptr                  // bytes allocated for large objects
-	largeAllocCount uintptr                  // number of large object allocations
-	smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
-	largeFree       uintptr                  // bytes freed for large objects (>maxSmallSize)
-	largeFreeCount  uintptr                  // number of frees for large objects (>maxSmallSize)
-	smallFreeCount  [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
-
 	// flushGen indicates the sweepgen during which this mcache
 	// was last flushed. If flushGen != mheap_.sweepgen, the spans
 	// in this mcache are stale and need to the flushed so they
@@ -117,7 +106,7 @@ func allocmcache() *mcache {
 // In some cases there is no way to simply release
 // resources, such as statistics, so donate them to
 // a different mcache (the recipient).
-func freemcache(c *mcache, recipient *mcache) {
+func freemcache(c *mcache) {
 	systemstack(func() {
 		c.releaseAll()
 		stackcache_clear(c)
@@ -128,8 +117,6 @@ func freemcache(c *mcache, recipient *mcache) {
 		// gcworkbuffree(c.gcworkbuf)
 
 		lock(&mheap_.lock)
-		// Donate anything else that's left.
-		c.donate(recipient)
 		mheap_.cachealloc.free(unsafe.Pointer(c))
 		unlock(&mheap_.lock)
 	})
@@ -158,31 +145,6 @@ func getMCache() *mcache {
 	return c
 }
 
-// donate flushes data and resources which have no global
-// pool to another mcache.
-func (c *mcache) donate(d *mcache) {
-	// scanAlloc is handled separately because it's not
-	// like these stats -- it's used for GC pacing.
-	d.largeAlloc += c.largeAlloc
-	c.largeAlloc = 0
-	d.largeAllocCount += c.largeAllocCount
-	c.largeAllocCount = 0
-	for i := range c.smallAllocCount {
-		d.smallAllocCount[i] += c.smallAllocCount[i]
-		c.smallAllocCount[i] = 0
-	}
-	d.largeFree += c.largeFree
-	c.largeFree = 0
-	d.largeFreeCount += c.largeFreeCount
-	c.largeFreeCount = 0
-	for i := range c.smallFreeCount {
-		d.smallFreeCount[i] += c.smallFreeCount[i]
-		c.smallFreeCount[i] = 0
-	}
-	d.tinyAllocCount += c.tinyAllocCount
-	c.tinyAllocCount = 0
-}
-
 // refill acquires a new span of span class spc for c. This span will
 // have at least one free object. The current span in c must be full.
 //
@@ -219,12 +181,20 @@ func (c *mcache) refill(spc spanClass) {
 
 	// Assume all objects from this span will be allocated in the
 	// mcache. If it gets uncached, we'll adjust this.
-	c.smallAllocCount[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount)
+	stats := memstats.heapStats.acquire(c)
+	atomic.Xadduintptr(&stats.smallAllocCount[spc.sizeclass()], uintptr(s.nelems)-uintptr(s.allocCount))
+	memstats.heapStats.release(c)
 
 	// Update heap_live with the same assumption.
 	usedBytes := uintptr(s.allocCount) * s.elemsize
 	atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes))
 
+	// Flush tinyAllocs.
+	if spc == tinySpanClass {
+		atomic.Xadd64(&memstats.tinyallocs, int64(c.tinyAllocs))
+		c.tinyAllocs = 0
+	}
+
 	// While we're here, flush scanAlloc, since we have to call
 	// revise anyway.
 	atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc))
@@ -262,8 +232,10 @@ func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) *mspan {
 	if s == nil {
 		throw("out of memory")
 	}
-	c.largeAlloc += npages * pageSize
-	c.largeAllocCount++
+	stats := memstats.heapStats.acquire(c)
+	atomic.Xadduintptr(&stats.largeAlloc, npages*pageSize)
+	atomic.Xadduintptr(&stats.largeAllocCount, 1)
+	memstats.heapStats.release(c)
 
 	// Update heap_live and revise pacing if needed.
 	atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
@@ -294,7 +266,9 @@ func (c *mcache) releaseAll() {
 		if s != &emptymspan {
 			// Adjust nsmallalloc in case the span wasn't fully allocated.
 			n := uintptr(s.nelems) - uintptr(s.allocCount)
-			c.smallAllocCount[spanClass(i).sizeclass()] -= n
+			stats := memstats.heapStats.acquire(c)
+			atomic.Xadduintptr(&stats.smallAllocCount[spanClass(i).sizeclass()], -n)
+			memstats.heapStats.release(c)
 			if s.sweepgen != sg+1 {
 				// refill conservatively counted unallocated slots in heap_live.
 				// Undo this.
@@ -313,6 +287,8 @@ func (c *mcache) releaseAll() {
 	// Clear tinyalloc pool.
 	c.tiny = 0
 	c.tinyoffset = 0
+	atomic.Xadd64(&memstats.tinyallocs, int64(c.tinyAllocs))
+	c.tinyAllocs = 0
 
 	// Updated heap_scan and possible heap_live.
 	if gcBlackenEnabled != 0 {
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 7103b08455..9b77ce635c 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -503,7 +503,9 @@ func (s *mspan) sweep(preserve bool) bool {
 			// wasn't totally filled, but then swept, still has all of its
 			// free slots zeroed.
 			s.needzero = 1
-			c.smallFreeCount[spc.sizeclass()] += uintptr(nfreed)
+			stats := memstats.heapStats.acquire(c)
+			atomic.Xadduintptr(&stats.smallFreeCount[spc.sizeclass()], uintptr(nfreed))
+			memstats.heapStats.release(c)
 		}
 		if !preserve {
 			// The caller may not have removed this span from whatever
@@ -548,8 +550,10 @@ func (s *mspan) sweep(preserve bool) bool {
 			} else {
 				mheap_.freeSpan(s)
 			}
-			c.largeFreeCount++
-			c.largeFree += size
+			stats := memstats.heapStats.acquire(c)
+			atomic.Xadduintptr(&stats.largeFreeCount, 1)
+			atomic.Xadduintptr(&stats.largeFree, size)
+			memstats.heapStats.release(c)
 			return true
 		}
 
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 4363eff1e0..a8eca85fe6 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -612,48 +612,36 @@ func updatememstats() {
 	memstats.total_alloc = 0
 	memstats.nmalloc = 0
 	memstats.nfree = 0
-	memstats.tinyallocs = 0
 	for i := 0; i < len(memstats.by_size); i++ {
 		memstats.by_size[i].nmalloc = 0
 		memstats.by_size[i].nfree = 0
 	}
-
-	// Collect allocation stats. This is safe and consistent
-	// because the world is stopped.
-	var smallFree, totalAlloc, totalFree uint64
-	for _, p := range allp {
-		c := p.mcache
-		if c == nil {
-			continue
-		}
-		// Collect large allocation stats.
-		memstats.nmalloc += uint64(c.largeAllocCount)
-		totalAlloc += uint64(c.largeAlloc)
-		totalFree += uint64(c.largeFree)
-		memstats.nfree += uint64(c.largeFreeCount)
-
-		// Collect tiny allocation stats.
-		memstats.tinyallocs += uint64(c.tinyAllocCount)
-
-		// Collect per-sizeclass stats.
-		for i := 0; i < _NumSizeClasses; i++ {
-			// Malloc stats.
-			memstats.nmalloc += uint64(c.smallAllocCount[i])
-			memstats.by_size[i].nmalloc += uint64(c.smallAllocCount[i])
-			totalAlloc += uint64(c.smallAllocCount[i]) * uint64(class_to_size[i])
-
-			// Free stats.
-			memstats.nfree += uint64(c.smallFreeCount[i])
-			memstats.by_size[i].nfree += uint64(c.smallFreeCount[i])
-			smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i])
-		}
-	}
 	// Collect consistent stats, which are the source-of-truth in the some cases.
 	var consStats heapStatsDelta
 	memstats.heapStats.unsafeRead(&consStats)
 
-	totalFree += smallFree
+	// Collect large allocation stats.
+	totalAlloc := uint64(consStats.largeAlloc)
+	memstats.nmalloc += uint64(consStats.largeAllocCount)
+	totalFree := uint64(consStats.largeFree)
+	memstats.nfree += uint64(consStats.largeFreeCount)
+
+	// Collect per-sizeclass stats.
+	for i := 0; i < _NumSizeClasses; i++ {
+		// Malloc stats.
+		a := uint64(consStats.smallAllocCount[i])
+		totalAlloc += a * uint64(class_to_size[i])
+		memstats.nmalloc += a
+		memstats.by_size[i].nmalloc = a
+
+		// Free stats.
+		f := uint64(consStats.smallFreeCount[i])
+		totalFree += f * uint64(class_to_size[i])
+		memstats.nfree += f
+		memstats.by_size[i].nfree = f
+	}
 
+	// Account for tiny allocations.
 	memstats.nfree += memstats.tinyallocs
 	memstats.nmalloc += memstats.tinyallocs
 
@@ -752,12 +740,25 @@ func (s *sysMemStat) add(n int64) {
 // that need to be updated together in order for them to be kept
 // consistent with one another.
 type heapStatsDelta struct {
+	// Memory stats.
 	committed       int64 // byte delta of memory committed
 	released        int64 // byte delta of released memory generated
 	inHeap          int64 // byte delta of memory placed in the heap
 	inStacks        int64 // byte delta of memory reserved for stacks
 	inWorkBufs      int64 // byte delta of memory reserved for work bufs
 	inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits
+
+	// Allocator stats.
+	largeAlloc      uintptr                  // bytes allocated for large objects
+	largeAllocCount uintptr                  // number of large object allocations
+	smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
+	largeFree       uintptr                  // bytes freed for large objects (>maxSmallSize)
+	largeFreeCount  uintptr                  // number of frees for large objects (>maxSmallSize)
+	smallFreeCount  [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
+
+	// Add a uint32 to ensure this struct is a multiple of 8 bytes in size.
+	// Only necessary on 32-bit platforms.
+	// _ [(sys.PtrSize / 4) % 2]uint32
 }
 
 // merge adds in the deltas from b into a.
@@ -768,6 +769,17 @@ func (a *heapStatsDelta) merge(b *heapStatsDelta) {
 	a.inStacks += b.inStacks
 	a.inWorkBufs += b.inWorkBufs
 	a.inPtrScalarBits += b.inPtrScalarBits
+
+	a.largeAlloc += b.largeAlloc
+	a.largeAllocCount += b.largeAllocCount
+	for i := range b.smallAllocCount {
+		a.smallAllocCount[i] += b.smallAllocCount[i]
+	}
+	a.largeFree += b.largeFree
+	a.largeFreeCount += b.largeFreeCount
+	for i := range b.smallFreeCount {
+		a.smallFreeCount[i] += b.smallFreeCount[i]
+	}
 }
 
 // consistentHeapStats represents a set of various memory statistics
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 4f4cff38aa..ebecc92745 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -4550,7 +4550,7 @@ func (pp *p) destroy() {
 		pp.mspancache.len = 0
 		pp.pcache.flush(&mheap_.pages)
 	})
-	freemcache(pp.mcache, allp[0].mcache)
+	freemcache(pp.mcache)
 	pp.mcache = nil
 	gfpurge(pp)
 	traceProcFree(pp)
-- 
cgit v1.3


From b08dfbaa439e4e396b979e02ea2e7d36972e8b7a Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 1 Jul 2020 16:02:42 +0000
Subject: runtime,runtime/metrics: add memory metrics

This change adds support for a variety of runtime memory metrics and
contains the base implementation of Read for the runtime/metrics
package, which lives in the runtime.

It also adds testing infrastructure for the metrics package, and a bunch
of format and documentation tests.

For #37112.

Change-Id: I16a2c4781eeeb2de0abcb045c15105f1210e2d8a
Reviewed-on: https://go-review.googlesource.com/c/go/+/247041
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Trust: Michael Knyszek <mknyszek@google.com>
---
 src/cmd/go/internal/work/gc.go          |   6 +-
 src/runtime/export_test.go              |  26 +++
 src/runtime/metrics.go                  | 367 ++++++++++++++++++++++++++++++++
 src/runtime/metrics/description.go      |  80 ++++++-
 src/runtime/metrics/description_test.go | 125 +++++++++++
 src/runtime/metrics/doc.go              |  56 ++++-
 src/runtime/metrics/sample.go           |  10 +-
 src/runtime/metrics_test.go             | 114 ++++++++++
 src/runtime/mstats.go                   |   3 +-
 9 files changed, 781 insertions(+), 6 deletions(-)
 create mode 100644 src/runtime/metrics.go
 create mode 100644 src/runtime/metrics/description_test.go
 create mode 100644 src/runtime/metrics_test.go

(limited to 'src/runtime/export_test.go')

diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go
index e93031431c..0c4a7fa6e3 100644
--- a/src/cmd/go/internal/work/gc.go
+++ b/src/cmd/go/internal/work/gc.go
@@ -89,7 +89,11 @@ func (gcToolchain) gc(b *Builder, a *Action, archive string, importcfg []byte, s
 	extFiles := len(p.CgoFiles) + len(p.CFiles) + len(p.CXXFiles) + len(p.MFiles) + len(p.FFiles) + len(p.SFiles) + len(p.SysoFiles) + len(p.SwigFiles) + len(p.SwigCXXFiles)
 	if p.Standard {
 		switch p.ImportPath {
-		case "bytes", "internal/poll", "net", "os", "runtime/pprof", "runtime/trace", "sync", "syscall", "time":
+		case "bytes", "internal/poll", "net", "os":
+			fallthrough
+		case "runtime/metrics", "runtime/pprof", "runtime/trace":
+			fallthrough
+		case "sync", "syscall", "time":
 			extFiles++
 		}
 	}
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index ff901fd7be..d043fe3ee5 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -298,6 +298,32 @@ func (p *ProfBuf) Close() {
 	(*profBuf)(p).close()
 }
 
+func ReadMetricsSlow(memStats *MemStats, samplesp unsafe.Pointer, len, cap int) {
+	stopTheWorld("ReadMetricsSlow")
+
+	// Initialize the metrics beforehand because this could
+	// allocate and skew the stats.
+	semacquire(&metricsSema)
+	initMetrics()
+	semrelease(&metricsSema)
+
+	systemstack(func() {
+		// Read memstats first. It's going to flush
+		// the mcaches which readMetrics does not do, so
+		// going the other way around may result in
+		// inconsistent statistics.
+		readmemstats_m(memStats)
+	})
+
+	// Read metrics off the system stack.
+	//
+	// The only part of readMetrics that could allocate
+	// and skew the stats is initMetrics.
+	readMetrics(samplesp, len, cap)
+
+	startTheWorld()
+}
+
 // ReadMemStatsSlow returns both the runtime-computed MemStats and
 // MemStats accumulated by scanning the heap.
 func ReadMemStatsSlow() (base, slow MemStats) {
diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go
new file mode 100644
index 0000000000..44b5a29751
--- /dev/null
+++ b/src/runtime/metrics.go
@@ -0,0 +1,367 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// Metrics implementation exported to runtime/metrics.
+
+import (
+	"unsafe"
+)
+
+var (
+	// metrics is a map of runtime/metrics keys to
+	// data used by the runtime to sample each metric's
+	// value.
+	metricsSema uint32 = 1
+	metricsInit bool
+	metrics     map[string]metricData
+)
+
+type metricData struct {
+	// deps is the set of runtime statistics that this metric
+	// depends on. Before compute is called, the statAggregate
+	// which will be passed must ensure() these dependencies.
+	deps statDepSet
+
+	// compute is a function that populates a metricValue
+	// given a populated statAggregate structure.
+	compute func(in *statAggregate, out *metricValue)
+}
+
+// initMetrics initializes the metrics map if it hasn't been yet.
+//
+// metricsSema must be held.
+func initMetrics() {
+	if metricsInit {
+		return
+	}
+	metrics = map[string]metricData{
+		"/memory/classes/heap/free:bytes": {
+			deps: makeStatDepSet(heapStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = uint64(in.heapStats.committed - in.heapStats.inHeap -
+					in.heapStats.inStacks - in.heapStats.inWorkBufs -
+					in.heapStats.inPtrScalarBits)
+			},
+		},
+		"/memory/classes/heap/objects:bytes": {
+			deps: makeStatDepSet(heapStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = in.heapStats.inObjects
+			},
+		},
+		"/memory/classes/heap/released:bytes": {
+			deps: makeStatDepSet(heapStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = uint64(in.heapStats.released)
+			},
+		},
+		"/memory/classes/heap/stacks:bytes": {
+			deps: makeStatDepSet(heapStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = uint64(in.heapStats.inStacks)
+			},
+		},
+		"/memory/classes/heap/unused:bytes": {
+			deps: makeStatDepSet(heapStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = uint64(in.heapStats.inHeap) - in.heapStats.inObjects
+			},
+		},
+		"/memory/classes/metadata/mcache/free:bytes": {
+			deps: makeStatDepSet(sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = in.sysStats.mCacheSys - in.sysStats.mCacheInUse
+			},
+		},
+		"/memory/classes/metadata/mcache/inuse:bytes": {
+			deps: makeStatDepSet(sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = in.sysStats.mCacheInUse
+			},
+		},
+		"/memory/classes/metadata/mspan/free:bytes": {
+			deps: makeStatDepSet(sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = in.sysStats.mSpanSys - in.sysStats.mSpanInUse
+			},
+		},
+		"/memory/classes/metadata/mspan/inuse:bytes": {
+			deps: makeStatDepSet(sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = in.sysStats.mSpanInUse
+			},
+		},
+		"/memory/classes/metadata/other:bytes": {
+			deps: makeStatDepSet(heapStatsDep, sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = uint64(in.heapStats.inWorkBufs+in.heapStats.inPtrScalarBits) + in.sysStats.gcMiscSys
+			},
+		},
+		"/memory/classes/os-stacks:bytes": {
+			deps: makeStatDepSet(sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = in.sysStats.stacksSys
+			},
+		},
+		"/memory/classes/other:bytes": {
+			deps: makeStatDepSet(sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = in.sysStats.otherSys
+			},
+		},
+		"/memory/classes/profiling/buckets:bytes": {
+			deps: makeStatDepSet(sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = in.sysStats.buckHashSys
+			},
+		},
+		"/memory/classes/total:bytes": {
+			deps: makeStatDepSet(heapStatsDep, sysStatsDep),
+			compute: func(in *statAggregate, out *metricValue) {
+				out.kind = metricKindUint64
+				out.scalar = uint64(in.heapStats.committed+in.heapStats.released) +
+					in.sysStats.stacksSys + in.sysStats.mSpanSys +
+					in.sysStats.mCacheSys + in.sysStats.buckHashSys +
+					in.sysStats.gcMiscSys + in.sysStats.otherSys
+			},
+		},
+	}
+	metricsInit = true
+}
+
+// statDep is a dependency on a group of statistics
+// that a metric might have.
+type statDep uint
+
+const (
+	heapStatsDep statDep = iota // corresponds to heapStatsAggregate
+	sysStatsDep                 // corresponds to sysStatsAggregate
+	numStatsDeps
+)
+
+// statDepSet represents a set of statDeps.
+//
+// Under the hood, it's a bitmap.
+type statDepSet [1]uint64
+
+// makeStatDepSet creates a new statDepSet from a list of statDeps.
+func makeStatDepSet(deps ...statDep) statDepSet {
+	var s statDepSet
+	for _, d := range deps {
+		s[d/64] |= 1 << (d % 64)
+	}
+	return s
+}
+
+// differennce returns set difference of s from b as a new set.
+func (s statDepSet) difference(b statDepSet) statDepSet {
+	var c statDepSet
+	for i := range s {
+		c[i] = s[i] &^ b[i]
+	}
+	return c
+}
+
+// union returns the union of the two sets as a new set.
+func (s statDepSet) union(b statDepSet) statDepSet {
+	var c statDepSet
+	for i := range s {
+		c[i] = s[i] | b[i]
+	}
+	return c
+}
+
+// empty returns true if there are no dependencies in the set.
+func (s *statDepSet) empty() bool {
+	for _, c := range s {
+		if c != 0 {
+			return false
+		}
+	}
+	return true
+}
+
+// has returns true if the set contains a given statDep.
+func (s *statDepSet) has(d statDep) bool {
+	return s[d/64]&(1<<(d%64)) != 0
+}
+
+// heapStatsAggregate represents memory stats obtained from the
+// runtime. This set of stats is grouped together because they
+// depend on each other in some way to make sense of the runtime's
+// current heap memory use. They're also sharded across Ps, so it
+// makes sense to grab them all at once.
+type heapStatsAggregate struct {
+	heapStatsDelta
+
+	// inObjects is the bytes of memory occupied by objects,
+	// derived from other values in heapStats.
+	inObjects uint64
+}
+
+// compute populates the heapStatsAggregate with values from the runtime.
+func (a *heapStatsAggregate) compute() {
+	memstats.heapStats.read(&a.heapStatsDelta)
+
+	// Calculate derived stats.
+	a.inObjects = uint64(a.largeAlloc - a.largeFree)
+	for i := range a.smallAllocCount {
+		a.inObjects += uint64(a.smallAllocCount[i]-a.smallFreeCount[i]) * uint64(class_to_size[i])
+	}
+}
+
+// sysStatsAggregate represents system memory stats obtained
+// from the runtime. This set of stats is grouped together because
+// they're all relatively cheap to acquire and generally independent
+// of one another and other runtime memory stats. The fact that they
+// may be acquired at different times, especially with respect to
+// heapStatsAggregate, means there could be some skew, but because of
+// these stats are independent, there's no real consistency issue here.
+type sysStatsAggregate struct {
+	stacksSys   uint64
+	mSpanSys    uint64
+	mSpanInUse  uint64
+	mCacheSys   uint64
+	mCacheInUse uint64
+	buckHashSys uint64
+	gcMiscSys   uint64
+	otherSys    uint64
+}
+
+// compute populates the sysStatsAggregate with values from the runtime.
+func (a *sysStatsAggregate) compute() {
+	a.stacksSys = memstats.stacks_sys.load()
+	a.buckHashSys = memstats.buckhash_sys.load()
+	a.gcMiscSys = memstats.gcMiscSys.load()
+	a.otherSys = memstats.other_sys.load()
+
+	systemstack(func() {
+		lock(&mheap_.lock)
+		a.mSpanSys = memstats.mspan_sys.load()
+		a.mSpanInUse = uint64(mheap_.spanalloc.inuse)
+		a.mCacheSys = memstats.mcache_sys.load()
+		a.mCacheInUse = uint64(mheap_.cachealloc.inuse)
+		unlock(&mheap_.lock)
+	})
+}
+
+// statAggregate is the main driver of the metrics implementation.
+//
+// It contains multiple aggregates of runtime statistics, as well
+// as a set of these aggregates that it has populated. The aggergates
+// are populated lazily by its ensure method.
+type statAggregate struct {
+	ensured   statDepSet
+	heapStats heapStatsAggregate
+	sysStats  sysStatsAggregate
+}
+
+// ensure populates statistics aggregates determined by deps if they
+// haven't yet been populated.
+func (a *statAggregate) ensure(deps *statDepSet) {
+	missing := deps.difference(a.ensured)
+	if missing.empty() {
+		return
+	}
+	for i := statDep(0); i < numStatsDeps; i++ {
+		if !missing.has(i) {
+			continue
+		}
+		switch i {
+		case heapStatsDep:
+			a.heapStats.compute()
+		case sysStatsDep:
+			a.sysStats.compute()
+		}
+	}
+	a.ensured = a.ensured.union(missing)
+}
+
+// metricValidKind is a runtime copy of runtime/metrics.ValueKind and
+// must be kept structurally identical to that type.
+type metricKind int
+
+const (
+	// These values must be kept identical to their corresponding Kind* values
+	// in the runtime/metrics package.
+	metricKindBad metricKind = iota
+	metricKindUint64
+	metricKindFloat64
+	metricKindFloat64Histogram
+)
+
+// metricSample is a runtime copy of runtime/metrics.Sample and
+// must be kept structurally identical to that type.
+type metricSample struct {
+	name  string
+	value metricValue
+}
+
+// metricValue is a runtime copy of runtime/metrics.Sample and
+// must be kept structurally identical to that type.
+type metricValue struct {
+	kind    metricKind
+	scalar  uint64         // contains scalar values for scalar Kinds.
+	pointer unsafe.Pointer // contains non-scalar values.
+}
+
+// agg is used by readMetrics, and is protected by metricsSema.
+//
+// Managed as a global variable because its pointer will be
+// an argument to a dynamically-defined function, and we'd
+// like to avoid it escaping to the heap.
+var agg statAggregate
+
+// readMetrics is the implementation of runtime/metrics.Read.
+//
+//go:linkname readMetrics runtime/metrics.runtime_readMetrics
+func readMetrics(samplesp unsafe.Pointer, len int, cap int) {
+	// Construct a slice from the args.
+	sl := slice{samplesp, len, cap}
+	samples := *(*[]metricSample)(unsafe.Pointer(&sl))
+
+	// Acquire the metricsSema but with handoff. This operation
+	// is expensive enough that queueing up goroutines and handing
+	// off between them will be noticably better-behaved.
+	semacquire1(&metricsSema, true, 0, 0)
+
+	// Ensure the map is initialized.
+	initMetrics()
+
+	// Clear agg defensively.
+	agg = statAggregate{}
+
+	// Sample.
+	for i := range samples {
+		sample := &samples[i]
+		data, ok := metrics[sample.name]
+		if !ok {
+			sample.value.kind = metricKindBad
+			continue
+		}
+		// Ensure we have all the stats we need.
+		// agg is populated lazily.
+		agg.ensure(&data.deps)
+
+		// Compute the value based on the stats we have.
+		data.compute(&agg, &sample.value)
+	}
+
+	semrelease(&metricsSema)
+}
diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go
index 32bb950a72..2e7df7e09f 100644
--- a/src/runtime/metrics/description.go
+++ b/src/runtime/metrics/description.go
@@ -10,7 +10,7 @@ type Description struct {
 	//
 	// The format of the metric may be described by the following regular expression.
 	//
-	// 	^(?P<name>/[^:]+):(?P<unit>[^:*\/]+(?:[*\/][^:*\/]+)*)$
+	// 	^(?P<name>/[^:]+):(?P<unit>[^:*/]+(?:[*/][^:*/]+)*)$
 	//
 	// The format splits the name into two components, separated by a colon: a path which always
 	// starts with a /, and a machine-parseable unit. The name may contain any valid Unicode
@@ -26,6 +26,9 @@ type Description struct {
 	// A complete name might look like "/memory/heap/free:bytes".
 	Name string
 
+	// Description is an English language sentence describing the metric.
+	Description string
+
 	// Kind is the kind of value for this metric.
 	//
 	// The purpose of this field is to allow users to filter out metrics whose values are
@@ -44,7 +47,80 @@ type Description struct {
 	StopTheWorld bool
 }
 
-var allDesc = []Description{}
+// The English language descriptions below must be kept in sync with the
+// descriptions of each metric in doc.go.
+var allDesc = []Description{
+	{
+		Name:        "/memory/classes/heap/free:bytes",
+		Description: "Memory that is available for allocation, and may be returned to the underlying system.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/heap/objects:bytes",
+		Description: "Memory occupied by live objects and dead objects that have not yet been collected.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/heap/released:bytes",
+		Description: "Memory that has been returned to the underlying system.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/heap/stacks:bytes",
+		Description: "Memory allocated from the heap that is occupied by stacks.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/heap/unused:bytes",
+		Description: "Memory that is unavailable for allocation, but cannot be returned to the underlying system.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/metadata/mcache/free:bytes",
+		Description: "Memory that is reserved for runtime mcache structures, but not in-use.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/metadata/mcache/inuse:bytes",
+		Description: "Memory that is occupied by runtime mcache structures that are currently being used.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/metadata/mspan/free:bytes",
+		Description: "Memory that is reserved for runtime mspan structures, but not in-use.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/metadata/mspan/inuse:bytes",
+		Description: "Memory that is occupied by runtime mspan structures that are currently being used.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/metadata/other:bytes",
+		Description: "Memory that is reserved for or used to hold runtime metadata.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/os-stacks:bytes",
+		Description: "Stack memory allocated by the underlying operating system.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/other:bytes",
+		Description: "Memory used by execution trace buffers, structures for debugging the runtime, finalizer and profiler specials, and more.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/profiling/buckets:bytes",
+		Description: "Memory that is used by the stack trace hash map used for profiling.",
+		Kind:        KindUint64,
+	},
+	{
+		Name:        "/memory/classes/total:bytes",
+		Description: "All memory mapped by the Go runtime into the current process as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes.",
+		Kind:        KindUint64,
+	},
+}
 
 // All returns a slice of containing metric descriptions for all supported metrics.
 func All() []Description {
diff --git a/src/runtime/metrics/description_test.go b/src/runtime/metrics/description_test.go
new file mode 100644
index 0000000000..e966a281a1
--- /dev/null
+++ b/src/runtime/metrics/description_test.go
@@ -0,0 +1,125 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package metrics_test
+
+import (
+	"bufio"
+	"os"
+	"path/filepath"
+	"regexp"
+	"runtime"
+	"runtime/metrics"
+	"strings"
+	"testing"
+)
+
+func TestDescriptionNameFormat(t *testing.T) {
+	r := regexp.MustCompile("^(?P<name>/[^:]+):(?P<unit>[^:*/]+(?:[*/][^:*/]+)*)$")
+	descriptions := metrics.All()
+	for _, desc := range descriptions {
+		if !r.MatchString(desc.Name) {
+			t.Errorf("metrics %q does not match regexp %s", desc.Name, r)
+		}
+	}
+}
+
+func extractMetricDocs(t *testing.T) map[string]string {
+	if runtime.GOOS == "android" {
+		t.Skip("no access to Go source on android")
+	}
+
+	// Get doc.go.
+	_, filename, _, _ := runtime.Caller(0)
+	filename = filepath.Join(filepath.Dir(filename), "doc.go")
+
+	f, err := os.Open(filename)
+	if err != nil {
+		t.Fatal(err)
+	}
+	const (
+		stateSearch          = iota // look for list of metrics
+		stateNextMetric             // look for next metric
+		stateNextDescription        // build description
+	)
+	state := stateSearch
+	s := bufio.NewScanner(f)
+	result := make(map[string]string)
+	var metric string
+	var prevMetric string
+	var desc strings.Builder
+	for s.Scan() {
+		line := strings.TrimSpace(s.Text())
+		switch state {
+		case stateSearch:
+			if line == "Supported metrics" {
+				state = stateNextMetric
+			}
+		case stateNextMetric:
+			// Ignore empty lines until we find a non-empty
+			// one. This will be our metric name.
+			if len(line) != 0 {
+				prevMetric = metric
+				metric = line
+				if prevMetric > metric {
+					t.Errorf("metrics %s and %s are out of lexicographical order", prevMetric, metric)
+				}
+				state = stateNextDescription
+			}
+		case stateNextDescription:
+			if len(line) == 0 || line == `*/` {
+				// An empty line means we're done.
+				// Write down the description and look
+				// for a new metric.
+				result[metric] = desc.String()
+				desc.Reset()
+				state = stateNextMetric
+			} else {
+				// As long as we're seeing data, assume that's
+				// part of the description and append it.
+				if desc.Len() != 0 {
+					// Turn previous newlines into spaces.
+					desc.WriteString(" ")
+				}
+				desc.WriteString(line)
+			}
+		}
+		if line == `*/` {
+			break
+		}
+	}
+	if state == stateSearch {
+		t.Fatalf("failed to find supported metrics docs in %s", filename)
+	}
+	return result
+}
+
+func TestDescriptionDocs(t *testing.T) {
+	docs := extractMetricDocs(t)
+	descriptions := metrics.All()
+	for _, d := range descriptions {
+		want := d.Description
+		got, ok := docs[d.Name]
+		if !ok {
+			t.Errorf("no docs found for metric %s", d.Name)
+			continue
+		}
+		if got != want {
+			t.Errorf("mismatched description and docs for metric %s", d.Name)
+			t.Errorf("want: %q, got %q", want, got)
+			continue
+		}
+	}
+	if len(docs) > len(descriptions) {
+	docsLoop:
+		for name, _ := range docs {
+			for _, d := range descriptions {
+				if name == d.Name {
+					continue docsLoop
+				}
+			}
+			t.Errorf("stale documentation for non-existent metric: %s", name)
+		}
+	}
+}
diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go
index b48c22ba30..fb4e23a2b5 100644
--- a/src/runtime/metrics/doc.go
+++ b/src/runtime/metrics/doc.go
@@ -44,6 +44,60 @@ the documentation of the Name field of the Description struct.
 
 Supported metrics
 
-TODO(mknyszek): List them here as they're added.
+	/memory/classes/heap/free:bytes
+		Memory that is available for allocation, and may be returned
+		to the underlying system.
+
+	/memory/classes/heap/objects:bytes
+		Memory occupied by live objects and dead objects that have
+		not yet been collected.
+
+	/memory/classes/heap/released:bytes
+		Memory that has been returned to the underlying system.
+
+	/memory/classes/heap/stacks:bytes
+		Memory allocated from the heap that is occupied by stacks.
+
+	/memory/classes/heap/unused:bytes
+		Memory that is unavailable for allocation, but cannot be
+		returned to the underlying system.
+
+	/memory/classes/metadata/mcache/free:bytes
+		Memory that is reserved for runtime mcache structures, but
+		not in-use.
+
+	/memory/classes/metadata/mcache/inuse:bytes
+		Memory that is occupied by runtime mcache structures that
+		are currently being used.
+
+	/memory/classes/metadata/mspan/free:bytes
+		Memory that is reserved for runtime mspan structures, but
+		not in-use.
+
+	/memory/classes/metadata/mspan/inuse:bytes
+		Memory that is occupied by runtime mspan structures that are
+		currently being used.
+
+	/memory/classes/metadata/other:bytes
+		Memory that is reserved for or used to hold runtime
+		metadata.
+
+	/memory/classes/os-stacks:bytes
+		Stack memory allocated by the underlying operating system.
+
+	/memory/classes/other:bytes
+		Memory used by execution trace buffers, structures for
+		debugging the runtime, finalizer and profiler specials, and
+		more.
+
+	/memory/classes/profiling/buckets:bytes
+		Memory that is used by the stack trace hash map used for
+		profiling.
+
+	/memory/classes/total:bytes
+		All memory mapped by the Go runtime into the current process
+		as read-write. Note that this does not include memory mapped
+		by code called via cgo or via the syscall package.
+		Sum of all metrics in /memory/classes.
 */
 package metrics
diff --git a/src/runtime/metrics/sample.go b/src/runtime/metrics/sample.go
index c7a3fc424a..b4b0979aa6 100644
--- a/src/runtime/metrics/sample.go
+++ b/src/runtime/metrics/sample.go
@@ -4,6 +4,11 @@
 
 package metrics
 
+import (
+	_ "runtime" // depends on the runtime via a linkname'd function
+	"unsafe"
+)
+
 // Sample captures a single metric sample.
 type Sample struct {
 	// Name is the name of the metric sampled.
@@ -16,6 +21,9 @@ type Sample struct {
 	Value Value
 }
 
+// Implemented in the runtime.
+func runtime_readMetrics(unsafe.Pointer, int, int)
+
 // Read populates each Value field in the given slice of metric samples.
 //
 // Desired metrics should be present in the slice with the appropriate name.
@@ -25,5 +33,5 @@ type Sample struct {
 // will have the value populated as KindBad to indicate that the name is
 // unknown.
 func Read(m []Sample) {
-	panic("unimplemented")
+	runtime_readMetrics(unsafe.Pointer(&m[0]), len(m), cap(m))
 }
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go
new file mode 100644
index 0000000000..f00aad07c4
--- /dev/null
+++ b/src/runtime/metrics_test.go
@@ -0,0 +1,114 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"runtime"
+	"runtime/metrics"
+	"strings"
+	"testing"
+	"unsafe"
+)
+
+func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) {
+	all := metrics.All()
+	samples := make([]metrics.Sample, len(all))
+	descs := make(map[string]metrics.Description)
+	for i := range all {
+		samples[i].Name = all[i].Name
+		descs[all[i].Name] = all[i]
+	}
+	return descs, samples
+}
+
+func TestReadMetrics(t *testing.T) {
+	// Tests whether readMetrics produces values aligning
+	// with ReadMemStats while the world is stopped.
+	var mstats runtime.MemStats
+	_, samples := prepareAllMetricsSamples()
+	runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples))
+
+	checkUint64 := func(t *testing.T, m string, got, want uint64) {
+		t.Helper()
+		if got != want {
+			t.Errorf("metric %q: got %d, want %d", m, got, want)
+		}
+	}
+
+	// Check to make sure the values we read line up with other values we read.
+	for i := range samples {
+		switch name := samples[i].Name; name {
+		case "/memory/classes/heap/free:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased)
+		case "/memory/classes/heap/released:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased)
+		case "/memory/classes/heap/objects:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc)
+		case "/memory/classes/heap/unused:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc)
+		case "/memory/classes/heap/stacks:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse)
+		case "/memory/classes/metadata/mcache/free:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse)
+		case "/memory/classes/metadata/mcache/inuse:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse)
+		case "/memory/classes/metadata/mspan/free:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse)
+		case "/memory/classes/metadata/mspan/inuse:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse)
+		case "/memory/classes/metadata/other:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys)
+		case "/memory/classes/os-stacks:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse)
+		case "/memory/classes/other:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys)
+		case "/memory/classes/profiling/buckets:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys)
+		case "/memory/classes/total:bytes":
+			checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys)
+		}
+	}
+}
+
+func TestReadMetricsConsistency(t *testing.T) {
+	// Tests whether readMetrics produces consistent, sensible values.
+	// The values are read concurrently with the runtime doing other
+	// things (e.g. allocating) so what we read can't reasonably compared
+	// to runtime values.
+
+	// Read all the supported metrics through the metrics package.
+	descs, samples := prepareAllMetricsSamples()
+	metrics.Read(samples)
+
+	// Check to make sure the values we read make sense.
+	var totalVirtual struct {
+		got, want uint64
+	}
+	for i := range samples {
+		kind := samples[i].Value.Kind()
+		if want := descs[samples[i].Name].Kind; kind != want {
+			t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want)
+			continue
+		}
+		if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") {
+			v := samples[i].Value.Uint64()
+			totalVirtual.want += v
+
+			// None of these stats should ever get this big.
+			// If they do, there's probably overflow involved,
+			// usually due to bad accounting.
+			if int64(v) < 0 {
+				t.Errorf("%q has high/negative value: %d", samples[i].Name, v)
+			}
+		}
+		switch samples[i].Name {
+		case "/memory/classes/total:bytes":
+			totalVirtual.got = samples[i].Value.Uint64()
+		}
+	}
+	if totalVirtual.got != totalVirtual.want {
+		t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
+	}
+}
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index a8eca85fe6..512a06cffa 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -882,7 +882,8 @@ func (m *consistentHeapStats) unsafeClear() {
 // heapStatsDelta, the resulting values should be complete and
 // valid statistic values.
 //
-// Not safe to call concurrently.
+// Not safe to call concurrently. The world must be stopped
+// or metricsSema must be held.
 func (m *consistentHeapStats) read(out *heapStatsDelta) {
 	// Getting preempted after this point is not safe because
 	// we read allp. We need to make sure a STW can't happen
-- 
cgit v1.3


From 36c5edd8d9e6c13af26733e5c820eae0598203fe Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 6 Aug 2020 20:36:49 +0000
Subject: runtime: add timeHistogram type

This change adds a concurrent HDR time histogram to the runtime with
tests. It also adds a function to generate boundaries for use by the
metrics package.

For #37112.

Change-Id: Ifbef8ddce8e3a965a0dcd58ccd4915c282ae2098
Reviewed-on: https://go-review.googlesource.com/c/go/+/247046
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go    |  24 +++++++
 src/runtime/histogram.go      | 148 ++++++++++++++++++++++++++++++++++++++++++
 src/runtime/histogram_test.go |  58 +++++++++++++++++
 src/runtime/metrics.go        |   2 +
 4 files changed, 232 insertions(+)
 create mode 100644 src/runtime/histogram.go
 create mode 100644 src/runtime/histogram_test.go

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index d043fe3ee5..4ca0420d2a 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -1141,3 +1141,27 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int {
 	s.gcmarkBits = nil
 	return result
 }
+
+const (
+	TimeHistSubBucketBits   = timeHistSubBucketBits
+	TimeHistNumSubBuckets   = timeHistNumSubBuckets
+	TimeHistNumSuperBuckets = timeHistNumSuperBuckets
+)
+
+type TimeHistogram timeHistogram
+
+// Counts returns the counts for the given bucket, subBucket indices.
+// Returns true if the bucket was valid, otherwise returns the counts
+// for the overflow bucket and false.
+func (th *TimeHistogram) Count(bucket, subBucket uint) (uint64, bool) {
+	t := (*timeHistogram)(th)
+	i := bucket*TimeHistNumSubBuckets + subBucket
+	if i >= uint(len(t.counts)) {
+		return t.overflow, false
+	}
+	return t.counts[i], true
+}
+
+func (th *TimeHistogram) Record(duration int64) {
+	(*timeHistogram)(th).record(duration)
+}
diff --git a/src/runtime/histogram.go b/src/runtime/histogram.go
new file mode 100644
index 0000000000..4020969eb9
--- /dev/null
+++ b/src/runtime/histogram.go
@@ -0,0 +1,148 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/atomic"
+	"runtime/internal/sys"
+)
+
+const (
+	// For the time histogram type, we use an HDR histogram.
+	// Values are placed in super-buckets based solely on the most
+	// significant set bit. Thus, super-buckets are power-of-2 sized.
+	// Values are then placed into sub-buckets based on the value of
+	// the next timeHistSubBucketBits most significant bits. Thus,
+	// sub-buckets are linear within a super-bucket.
+	//
+	// Therefore, the number of sub-buckets (timeHistNumSubBuckets)
+	// defines the error. This error may be computed as
+	// 1/timeHistNumSubBuckets*100%. For example, for 16 sub-buckets
+	// per super-bucket the error is approximately 6%.
+	//
+	// The number of super-buckets (timeHistNumSuperBuckets), on the
+	// other hand, defines the range. To reserve room for sub-buckets,
+	// bit timeHistSubBucketBits is the first bit considered for
+	// super-buckets, so super-bucket indicies are adjusted accordingly.
+	//
+	// As an example, consider 45 super-buckets with 16 sub-buckets.
+	//
+	//    00110
+	//    ^----
+	//    │  ^
+	//    │  └---- Lowest 4 bits -> sub-bucket 6
+	//    └------- Bit 4 unset -> super-bucket 0
+	//
+	//    10110
+	//    ^----
+	//    │  ^
+	//    │  └---- Next 4 bits -> sub-bucket 6
+	//    └------- Bit 4 set -> super-bucket 1
+	//    100010
+	//    ^----^
+	//    │  ^ └-- Lower bits ignored
+	//    │  └---- Next 4 bits -> sub-bucket 1
+	//    └------- Bit 5 set -> super-bucket 2
+	//
+	// Following this pattern, bucket 45 will have the bit 48 set. We don't
+	// have any buckets for higher values, so the highest sub-bucket will
+	// contain values of 2^48-1 nanoseconds or approx. 3 days. This range is
+	// more than enough to handle durations produced by the runtime.
+	timeHistSubBucketBits   = 4
+	timeHistNumSubBuckets   = 1 << timeHistSubBucketBits
+	timeHistNumSuperBuckets = 45
+	timeHistTotalBuckets    = timeHistNumSuperBuckets*timeHistNumSubBuckets + 1
+)
+
+// timeHistogram represents a distribution of durations in
+// nanoseconds.
+//
+// The accuracy and range of the histogram is defined by the
+// timeHistSubBucketBits and timeHistNumSuperBuckets constants.
+//
+// It is an HDR histogram with exponentially-distributed
+// buckets and linearly distributed sub-buckets.
+//
+// Counts in the histogram are updated atomically, so it is safe
+// for concurrent use. It is also safe to read all the values
+// atomically.
+type timeHistogram struct {
+	counts   [timeHistNumSuperBuckets * timeHistNumSubBuckets]uint64
+	overflow uint64
+}
+
+// record adds the given duration to the distribution.
+//
+// Although the duration is an int64 to facilitate ease-of-use
+// with e.g. nanotime, the duration must be non-negative.
+func (h *timeHistogram) record(duration int64) {
+	if duration < 0 {
+		throw("timeHistogram encountered negative duration")
+	}
+	// The index of the exponential bucket is just the index
+	// of the highest set bit adjusted for how many bits we
+	// use for the subbucket. Note that it's timeHistSubBucketsBits-1
+	// because we use the 0th bucket to hold values < timeHistNumSubBuckets.
+	var superBucket, subBucket uint
+	if duration >= timeHistNumSubBuckets {
+		// At this point, we know the duration value will always be
+		// at least timeHistSubBucketsBits long.
+		superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits
+		if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) {
+			// The bucket index we got is larger than what we support, so
+			// add into the special overflow bucket.
+			atomic.Xadd64(&h.overflow, 1)
+			return
+		}
+		// The linear subbucket index is just the timeHistSubBucketsBits
+		// bits after the top bit. To extract that value, shift down
+		// the duration such that we leave the top bit and the next bits
+		// intact, then extract the index.
+		subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets)
+	} else {
+		subBucket = uint(duration)
+	}
+	atomic.Xadd64(&h.counts[superBucket*timeHistNumSubBuckets+subBucket], 1)
+}
+
+// timeHistogramMetricsBuckets generates a slice of boundaries for
+// the timeHistogram. These boundaries are represented in seconds,
+// not nanoseconds like the timeHistogram represents durations.
+func timeHistogramMetricsBuckets() []float64 {
+	b := make([]float64, timeHistTotalBuckets-1)
+	for i := 0; i < timeHistNumSuperBuckets; i++ {
+		superBucketMin := uint64(0)
+		// The (inclusive) minimum for the first bucket is 0.
+		if i > 0 {
+			// The minimum for the second bucket will be
+			// 1 << timeHistSubBucketBits, indicating that all
+			// sub-buckets are represented by the next timeHistSubBucketBits
+			// bits.
+			// Thereafter, we shift up by 1 each time, so we can represent
+			// this pattern as (i-1)+timeHistSubBucketBits.
+			superBucketMin = uint64(1) << uint(i-1+timeHistSubBucketBits)
+		}
+		// subBucketShift is the amount that we need to shift the sub-bucket
+		// index to combine it with the bucketMin.
+		subBucketShift := uint(0)
+		if i > 1 {
+			// The first two buckets are exact with respect to integers,
+			// so we'll never have to shift the sub-bucket index. Thereafter,
+			// we shift up by 1 with each subsequent bucket.
+			subBucketShift = uint(i - 2)
+		}
+		for j := 0; j < timeHistNumSubBuckets; j++ {
+			// j is the sub-bucket index. By shifting the index into position to
+			// combine with the bucket minimum, we obtain the minimum value for that
+			// sub-bucket.
+			subBucketMin := superBucketMin + (uint64(j) << subBucketShift)
+
+			// Convert the subBucketMin which is in nanoseconds to a float64 seconds value.
+			// These values will all be exactly representable by a float64.
+			b[i*timeHistNumSubBuckets+j] = float64(subBucketMin) / 1e9
+		}
+	}
+	return b
+}
diff --git a/src/runtime/histogram_test.go b/src/runtime/histogram_test.go
new file mode 100644
index 0000000000..5f5b28f784
--- /dev/null
+++ b/src/runtime/histogram_test.go
@@ -0,0 +1,58 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	. "runtime"
+	"testing"
+)
+
+var dummyTimeHistogram TimeHistogram
+
+func TestTimeHistogram(t *testing.T) {
+	// We need to use a global dummy because this
+	// could get stack-allocated with a non-8-byte alignment.
+	// The result of this bad alignment is a segfault on
+	// 32-bit platforms when calling Record.
+	h := &dummyTimeHistogram
+
+	// Record exactly one sample in each bucket.
+	for i := 0; i < TimeHistNumSuperBuckets; i++ {
+		var base int64
+		if i > 0 {
+			base = int64(1) << (i + TimeHistSubBucketBits - 1)
+		}
+		for j := 0; j < TimeHistNumSubBuckets; j++ {
+			v := int64(j)
+			if i > 0 {
+				v <<= i - 1
+			}
+			h.Record(base + v)
+		}
+	}
+	// Hit the overflow bucket.
+	h.Record(int64(^uint64(0) >> 1))
+
+	// Check to make sure there's exactly one count in each
+	// bucket.
+	for i := uint(0); i < TimeHistNumSuperBuckets; i++ {
+		for j := uint(0); j < TimeHistNumSubBuckets; j++ {
+			c, ok := h.Count(i, j)
+			if !ok {
+				t.Errorf("hit overflow bucket unexpectedly: (%d, %d)", i, j)
+			} else if c != 1 {
+				t.Errorf("bucket (%d, %d) has count that is not 1: %d", i, j, c)
+			}
+		}
+	}
+	c, ok := h.Count(TimeHistNumSuperBuckets, 0)
+	if ok {
+		t.Errorf("expected to hit overflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0)
+	}
+	if c != 1 {
+		t.Errorf("overflow bucket has count that is not 1: %d", c)
+	}
+	dummyTimeHistogram = TimeHistogram{}
+}
diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go
index 32d8ab461c..2be38ccaaa 100644
--- a/src/runtime/metrics.go
+++ b/src/runtime/metrics.go
@@ -20,6 +20,7 @@ var (
 	metrics     map[string]metricData
 
 	sizeClassBuckets []float64
+	timeHistBuckets  []float64
 )
 
 type metricData struct {
@@ -44,6 +45,7 @@ func initMetrics() {
 	for i := range sizeClassBuckets {
 		sizeClassBuckets[i] = float64(class_to_size[i])
 	}
+	timeHistBuckets = timeHistogramMetricsBuckets()
 	metrics = map[string]metricData{
 		"/gc/cycles/automatic:gc-cycles": {
 			deps: makeStatDepSet(sysStatsDep),
-- 
cgit v1.3


From 9393b5bae5944acebed3ab6f995926b7de3ce429 Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Fri, 21 Aug 2020 11:59:55 -0400
Subject: runtime: add heap lock assertions

Some functions that required holding the heap lock _or_ world stop have
been simplified to simply requiring the heap lock. This is conceptually
simpler and taking the heap lock during world stop is guaranteed to not
contend. This was only done on functions already called on the
systemstack to avoid too many extra systemstack calls in GC.

Updates #40677

Change-Id: I15aa1dadcdd1a81aac3d2a9ecad6e7d0377befdc
Reviewed-on: https://go-review.googlesource.com/c/go/+/250262
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Trust: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go | 61 +++++++++++++++++++++++++++++++++++++++++-----
 src/runtime/malloc.go      |  2 ++
 src/runtime/mgc.go         |  4 +++
 src/runtime/mgcscavenge.go | 18 ++++++++++++++
 src/runtime/mheap.go       | 29 +++++++++++++++++-----
 src/runtime/mpagealloc.go  | 22 +++++++++++++++++
 src/runtime/mpagecache.go  | 14 ++++++++++-
 src/runtime/proc.go        |  2 ++
 8 files changed, 139 insertions(+), 13 deletions(-)

(limited to 'src/runtime/export_test.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 4ca0420d2a..44551dcaf1 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -743,7 +743,16 @@ func (c *PageCache) Alloc(npages uintptr) (uintptr, uintptr) {
 	return (*pageCache)(c).alloc(npages)
 }
 func (c *PageCache) Flush(s *PageAlloc) {
-	(*pageCache)(c).flush((*pageAlloc)(s))
+	cp := (*pageCache)(c)
+	sp := (*pageAlloc)(s)
+
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(sp.mheapLock)
+		cp.flush(sp)
+		unlock(sp.mheapLock)
+	})
 }
 
 // Expose chunk index type.
@@ -754,13 +763,41 @@ type ChunkIdx chunkIdx
 type PageAlloc pageAlloc
 
 func (p *PageAlloc) Alloc(npages uintptr) (uintptr, uintptr) {
-	return (*pageAlloc)(p).alloc(npages)
+	pp := (*pageAlloc)(p)
+
+	var addr, scav uintptr
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		addr, scav = pp.alloc(npages)
+		unlock(pp.mheapLock)
+	})
+	return addr, scav
 }
 func (p *PageAlloc) AllocToCache() PageCache {
-	return PageCache((*pageAlloc)(p).allocToCache())
+	pp := (*pageAlloc)(p)
+
+	var c PageCache
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		c = PageCache(pp.allocToCache())
+		unlock(pp.mheapLock)
+	})
+	return c
 }
 func (p *PageAlloc) Free(base, npages uintptr) {
-	(*pageAlloc)(p).free(base, npages)
+	pp := (*pageAlloc)(p)
+
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		pp.free(base, npages)
+		unlock(pp.mheapLock)
+	})
 }
 func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
 	return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end)
@@ -768,6 +805,8 @@ func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
 func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) {
 	pp := (*pageAlloc)(p)
 	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
 		lock(pp.mheapLock)
 		r = pp.scavenge(nbytes, mayUnlock)
 		unlock(pp.mheapLock)
@@ -926,7 +965,11 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 		addr := chunkBase(chunkIdx(i))
 
 		// Mark the chunk's existence in the pageAlloc.
-		p.grow(addr, pallocChunkBytes)
+		systemstack(func() {
+			lock(p.mheapLock)
+			p.grow(addr, pallocChunkBytes)
+			unlock(p.mheapLock)
+		})
 
 		// Initialize the bitmap and update pageAlloc metadata.
 		chunk := p.chunkOf(chunkIndex(addr))
@@ -957,13 +1000,19 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 		}
 
 		// Update heap metadata for the allocRange calls above.
-		p.update(addr, pallocChunkPages, false, false)
+		systemstack(func() {
+			lock(p.mheapLock)
+			p.update(addr, pallocChunkPages, false, false)
+			unlock(p.mheapLock)
+		})
 	}
+
 	systemstack(func() {
 		lock(p.mheapLock)
 		p.scavengeStartGen()
 		unlock(p.mheapLock)
 	})
+
 	return (*PageAlloc)(p)
 }
 
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 0563f49d17..4b798d129c 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -627,6 +627,8 @@ func mallocinit() {
 //
 // h must be locked.
 func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+	assertLockHeld(&h.lock)
+
 	n = alignUp(n, heapArenaBytes)
 
 	// First, try the arena pre-reservation.
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index fb3c149942..185d3201ca 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -821,6 +821,8 @@ func pollFractionalWorkerExit() bool {
 //
 // mheap_.lock must be held or the world must be stopped.
 func gcSetTriggerRatio(triggerRatio float64) {
+	assertWorldStoppedOrLockHeld(&mheap_.lock)
+
 	// Compute the next GC goal, which is when the allocated heap
 	// has grown by GOGC/100 over the heap marked by the last
 	// cycle.
@@ -960,6 +962,8 @@ func gcSetTriggerRatio(triggerRatio float64) {
 //
 // mheap_.lock must be held or the world must be stopped.
 func gcEffectiveGrowthRatio() float64 {
+	assertWorldStoppedOrLockHeld(&mheap_.lock)
+
 	egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked)
 	if egogc < 0 {
 		// Shouldn't happen, but just in case.
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 5843ada981..a242577bd9 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -397,6 +397,8 @@ func bgscavenge(c chan int) {
 //
 //go:systemstack
 func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	var (
 		addrs addrRange
 		gen   uint32
@@ -446,6 +448,8 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeStartGen() {
+	assertLockHeld(p.mheapLock)
+
 	if debug.scavtrace > 0 {
 		printScavTrace(p.scav.gen, p.scav.released, false)
 	}
@@ -495,6 +499,8 @@ func (p *pageAlloc) scavengeStartGen() {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
+	assertLockHeld(p.mheapLock)
+
 	// Start by reserving the minimum.
 	r := p.scav.inUse.removeLast(p.scav.reservationBytes)
 
@@ -525,6 +531,8 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
+	assertLockHeld(p.mheapLock)
+
 	if r.size() == 0 || gen != p.scav.gen {
 		return
 	}
@@ -552,6 +560,8 @@ func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
+	assertLockHeld(p.mheapLock)
+
 	// Defensively check if we've recieved an empty address range.
 	// If so, just return.
 	if work.size() == 0 {
@@ -610,6 +620,8 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 		// If we found something, scavenge it and return!
 		if npages != 0 {
 			work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
+
+			assertLockHeld(p.mheapLock) // Must be locked on return.
 			return uintptr(npages) * pageSize, work
 		}
 	}
@@ -674,12 +686,16 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 		base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
 		if npages > 0 {
 			work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
+
+			assertLockHeld(p.mheapLock) // Must be locked on return.
 			return uintptr(npages) * pageSize, work
 		}
 
 		// We were fooled, so let's continue from where we left off.
 		work.limit = offAddr{chunkBase(candidateChunkIdx)}
 	}
+
+	assertLockHeld(p.mheapLock) // Must be locked on return.
 	return 0, work
 }
 
@@ -692,6 +708,8 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	p.chunkOf(ci).scavenged.setRange(base, npages)
 
 	// Compute the full address for the start of the range.
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 14a73c0491..66a59cb999 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -483,10 +483,15 @@ func (s *mspan) layout() (size, n, total uintptr) {
 // indirect call from the fixalloc initializer, the compiler can't see
 // this.
 //
+// The heap lock must be held.
+//
 //go:nowritebarrierrec
 func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
 	h := (*mheap)(vh)
 	s := (*mspan)(p)
+
+	assertLockHeld(&h.lock)
+
 	if len(h.allspans) >= cap(h.allspans) {
 		n := 64 * 1024 / sys.PtrSize
 		if n < cap(h.allspans)*3/2 {
@@ -721,7 +726,7 @@ func (h *mheap) init() {
 //
 // reclaim implements the page-reclaimer half of the sweeper.
 //
-// h must NOT be locked.
+// h.lock must NOT be held.
 func (h *mheap) reclaim(npage uintptr) {
 	// TODO(austin): Half of the time spent freeing spans is in
 	// locking/unlocking the heap (even with low contention). We
@@ -804,6 +809,8 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 	// In particular, if a span were freed and merged concurrently
 	// with this probing heapArena.spans, it would be possible to
 	// observe arbitrary, stale span pointers.
+	assertLockHeld(&h.lock)
+
 	n0 := n
 	var nFreed uintptr
 	sg := h.sweepgen
@@ -858,6 +865,8 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 		traceGCSweepSpan((n0 - nFreed) * pageSize)
 		lock(&h.lock)
 	}
+
+	assertLockHeld(&h.lock) // Must be locked on return.
 	return nFreed
 }
 
@@ -1011,7 +1020,7 @@ func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
 // tryAllocMSpan attempts to allocate an mspan object from
 // the P-local cache, but may fail.
 //
-// h need not be locked.
+// h.lock need not be held.
 //
 // This caller must ensure that its P won't change underneath
 // it during this function. Currently to ensure that we enforce
@@ -1035,7 +1044,7 @@ func (h *mheap) tryAllocMSpan() *mspan {
 
 // allocMSpanLocked allocates an mspan object.
 //
-// h must be locked.
+// h.lock must be held.
 //
 // allocMSpanLocked must be called on the system stack because
 // its caller holds the heap lock. See mheap for details.
@@ -1044,6 +1053,8 @@ func (h *mheap) tryAllocMSpan() *mspan {
 //
 //go:systemstack
 func (h *mheap) allocMSpanLocked() *mspan {
+	assertLockHeld(&h.lock)
+
 	pp := getg().m.p.ptr()
 	if pp == nil {
 		// We don't have a p so just do the normal thing.
@@ -1065,7 +1076,7 @@ func (h *mheap) allocMSpanLocked() *mspan {
 
 // freeMSpanLocked free an mspan object.
 //
-// h must be locked.
+// h.lock must be held.
 //
 // freeMSpanLocked must be called on the system stack because
 // its caller holds the heap lock. See mheap for details.
@@ -1074,6 +1085,8 @@ func (h *mheap) allocMSpanLocked() *mspan {
 //
 //go:systemstack
 func (h *mheap) freeMSpanLocked(s *mspan) {
+	assertLockHeld(&h.lock)
+
 	pp := getg().m.p.ptr()
 	// First try to free the mspan directly to the cache.
 	if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) {
@@ -1097,7 +1110,7 @@ func (h *mheap) freeMSpanLocked(s *mspan) {
 //
 // The returned span is fully initialized.
 //
-// h must not be locked.
+// h.lock must not be held.
 //
 // allocSpan must be called on the system stack both because it acquires
 // the heap lock and because it must block GC transitions.
@@ -1281,8 +1294,10 @@ HaveSpan:
 // Try to add at least npage pages of memory to the heap,
 // returning whether it worked.
 //
-// h must be locked.
+// h.lock must be held.
 func (h *mheap) grow(npage uintptr) bool {
+	assertLockHeld(&h.lock)
+
 	// We must grow the heap in whole palloc chunks.
 	ask := alignUp(npage, pallocChunkPages) * pageSize
 
@@ -1391,6 +1406,8 @@ func (h *mheap) freeManual(s *mspan, typ spanAllocType) {
 }
 
 func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
+	assertLockHeld(&h.lock)
+
 	switch s.state.get() {
 	case mSpanManual:
 		if s.allocCount != 0 {
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 2af1c97e0b..dac1f39969 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -349,6 +349,8 @@ func (p *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) grow(base, size uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// Round up to chunks, since we can't deal with increments smaller
 	// than chunks. Also, sysGrow expects aligned values.
 	limit := alignUp(base+size, pallocChunkBytes)
@@ -413,6 +415,8 @@ func (p *pageAlloc) grow(base, size uintptr) {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
+	assertLockHeld(p.mheapLock)
+
 	// base, limit, start, and end are inclusive.
 	limit := base + npages*pageSize - 1
 	sc, ec := chunkIndex(base), chunkIndex(limit)
@@ -499,6 +503,8 @@ func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) allocRange(base, npages uintptr) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	limit := base + npages*pageSize - 1
 	sc, ec := chunkIndex(base), chunkIndex(limit)
 	si, ei := chunkPageIndex(base), chunkPageIndex(limit)
@@ -534,6 +540,8 @@ func (p *pageAlloc) allocRange(base, npages uintptr) uintptr {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr {
+	assertLockHeld(p.mheapLock)
+
 	// If we're not in a test, validate first by checking mheap_.arenas.
 	// This is a fast path which is only safe to use outside of testing.
 	ai := arenaIndex(addr.addr())
@@ -568,6 +576,8 @@ func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) find(npages uintptr) (uintptr, offAddr) {
+	assertLockHeld(p.mheapLock)
+
 	// Search algorithm.
 	//
 	// This algorithm walks each level l of the radix tree from the root level
@@ -786,7 +796,13 @@ nextLevel:
 // should be ignored.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// If the searchAddr refers to a region which has a higher address than
 	// any known chunk, then we know we're out of memory.
 	if chunkIndex(p.searchAddr.addr()) >= p.end {
@@ -841,7 +857,13 @@ Found:
 // free returns npages worth of memory starting at base back to the page heap.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) free(base, npages uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// If we're freeing pages below the p.searchAddr, update searchAddr.
 	if b := (offAddr{base}); b.lessThan(p.searchAddr) {
 		p.searchAddr = b
diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go
index 5f76501a1c..4b5c66d8d6 100644
--- a/src/runtime/mpagecache.go
+++ b/src/runtime/mpagecache.go
@@ -71,8 +71,14 @@ func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) {
 // into s. Then, it clears the cache, such that empty returns
 // true.
 //
-// p.mheapLock must be held or the world must be stopped.
+// p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (c *pageCache) flush(p *pageAlloc) {
+	assertLockHeld(p.mheapLock)
+
 	if c.empty() {
 		return
 	}
@@ -103,7 +109,13 @@ func (c *pageCache) flush(p *pageAlloc) {
 // chunk.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) allocToCache() pageCache {
+	assertLockHeld(p.mheapLock)
+
 	// If the searchAddr refers to a region which has a higher address than
 	// any known chunk, then we know we're out of memory.
 	if chunkIndex(p.searchAddr.addr()) >= p.end {
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 82284e6cd6..ced27ceb3a 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -4603,7 +4603,9 @@ func (pp *p) destroy() {
 			mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
 		}
 		pp.mspancache.len = 0
+		lock(&mheap_.lock)
 		pp.pcache.flush(&mheap_.pages)
+		unlock(&mheap_.lock)
 	})
 	freemcache(pp.mcache)
 	pp.mcache = nil
-- 
cgit v1.3