runtime: rewrite traceMap to scale better

The existing implementation of traceMap is a hash map with a fixed bucket table size which scales poorly with the number of elements added to the map. After a few thousands elements are in the map, it tends to fall over. Furthermore, cleaning up the trace map is currently non-preemptible, without very good reason. This change replaces the traceMap implementation with a simple append-only concurrent hash-trie. The data structure is incredibly simple and does not suffer at all from the same scaling issues. Because the traceMap no longer has a lock, and the traceRegionAlloc it embeds is not thread-safe, we have to push that lock down. While we're here, this change also makes the fast path for the traceRegionAlloc lock-free. This may not be inherently faster due to contention on the atomic add, but it creates an easy path to sharding the main allocation buffer to reduce contention in the future. (We might want to also consider a fully thread-local allocator that covers both string and stack tables. The only reason a thread-local allocator isn't feasible right now is because each of these has their own region, but we could certainly group all them together.) Change-Id: I8c06d42825c326061a1b8569e322afc4bc2a513a Reviewed-on: https://go-review.googlesource.com/c/go/+/570035 Reviewed-by: Carlos Amedee <carlos@golang.org> Auto-Submit: Michael Knyszek <mknyszek@google.com> TryBot-Bypass: Michael Knyszek <mknyszek@google.com> Reviewed-by: David Chase <drchase@google.com>
author: Michael Anthony Knyszek <mknyszek@google.com> 2024-03-15 22:18:06 +0000
committer: Michael Knyszek <mknyszek@google.com> 2024-04-10 18:52:49 +0000
commit: 5bba5b256ce287736da60372c5cf634395d7b1a3 (patch)
tree: 8bbb410d62048d6a5d472d2f37c8a17ddb14450e /src/runtime/trace2stack.go
parent: 73981695a2518b6eae7f8ffe74d224691c60d433 (diff)
download: go-5bba5b256ce287736da60372c5cf634395d7b1a3.tar.xz
1 files changed, 42 insertions, 49 deletions
diff --git a/src/runtime/trace2stack.go b/src/runtime/trace2stack.go
index 7d698c89d3..dfccaabd62 100644
--- a/src/runtime/trace2stack.go
+++ b/src/runtime/trace2stack.go
@@ -140,62 +140,55 @@ func (t *traceStackTable) put(pcs []uintptr) uint64 {
 // can guarantee that there are no more writers to the table.
 func (t *traceStackTable) dump(gen uintptr) {
 	w := unsafeTraceWriter(gen, nil)
+	if root := (*traceMapNode)(t.tab.root.Load()); root != nil {
+		w = dumpStacksRec(root, w)
+	}
+	w.flush().end()
+	t.tab.reset()
+}
 
-	// Iterate over the table.
-	//
-	// Do not acquire t.tab.lock. There's a conceptual lock cycle between acquiring this lock
-	// here and allocation-related locks. Specifically, this lock may be acquired when an event
-	// is emitted in allocation paths. Simultaneously, we might allocate here with the lock held,
-	// creating a cycle. In practice, this cycle is never exercised. Because the table is only
-	// dumped once there are no more writers, it's not possible for the cycle to occur. However
-	// the lockrank mode is not sophisticated enough to identify this, and if it's not possible
-	// for that cycle to happen, then it's also not possible for this to race with writers to
-	// the table.
-	for i := range t.tab.tab {
-		stk := t.tab.bucket(i)
-		for ; stk != nil; stk = stk.next() {
-			stack := unsafe.Slice((*uintptr)(unsafe.Pointer(&stk.data[0])), uintptr(len(stk.data))/unsafe.Sizeof(uintptr(0)))
+func dumpStacksRec(node *traceMapNode, w traceWriter) traceWriter {
+	stack := unsafe.Slice((*uintptr)(unsafe.Pointer(&node.data[0])), uintptr(len(node.data))/unsafe.Sizeof(uintptr(0)))
 
-			// N.B. This might allocate, but that's OK because we're not writing to the M's buffer,
-			// but one we're about to create (with ensure).
-			frames := makeTraceFrames(gen, fpunwindExpand(stack))
+	// N.B. This might allocate, but that's OK because we're not writing to the M's buffer,
+	// but one we're about to create (with ensure).
+	frames := makeTraceFrames(w.gen, fpunwindExpand(stack))
 
-			// Returns the maximum number of bytes required to hold the encoded stack, given that
-			// it contains N frames.
-			maxBytes := 1 + (2+4*len(frames))*traceBytesPerNumber
+	// The maximum number of bytes required to hold the encoded stack, given that
+	// it contains N frames.
+	maxBytes := 1 + (2+4*len(frames))*traceBytesPerNumber
 
-			// Estimate the size of this record. This
-			// bound is pretty loose, but avoids counting
-			// lots of varint sizes.
-			//
-			// Add 1 because we might also write traceEvStacks.
-			var flushed bool
-			w, flushed = w.ensure(1 + maxBytes)
-			if flushed {
-				w.byte(byte(traceEvStacks))
-			}
+	// Estimate the size of this record. This
+	// bound is pretty loose, but avoids counting
+	// lots of varint sizes.
+	//
+	// Add 1 because we might also write traceEvStacks.
+	var flushed bool
+	w, flushed = w.ensure(1 + maxBytes)
+	if flushed {
+		w.byte(byte(traceEvStacks))
+	}
 
-			// Emit stack event.
-			w.byte(byte(traceEvStack))
-			w.varint(uint64(stk.id))
-			w.varint(uint64(len(frames)))
-			for _, frame := range frames {
-				w.varint(uint64(frame.PC))
-				w.varint(frame.funcID)
-				w.varint(frame.fileID)
-				w.varint(frame.line)
-			}
-		}
+	// Emit stack event.
+	w.byte(byte(traceEvStack))
+	w.varint(uint64(node.id))
+	w.varint(uint64(len(frames)))
+	for _, frame := range frames {
+		w.varint(uint64(frame.PC))
+		w.varint(frame.funcID)
+		w.varint(frame.fileID)
+		w.varint(frame.line)
 	}
-	// Still, hold the lock over reset. The callee expects it, even though it's
-	// not strictly necessary.
-	systemstack(func() {
-		lock(&t.tab.lock)
-		t.tab.reset()
-		unlock(&t.tab.lock)
-	})
 
-	w.flush().end()
+	// Recursively walk all child nodes.
+	for i := range node.children {
+		child := node.children[i].Load()
+		if child == nil {
+			continue
+		}
+		w = dumpStacksRec((*traceMapNode)(child), w)
+	}
+	return w
 }
 
 // makeTraceFrames returns the frames corresponding to pcs. It may
author	Michael Anthony Knyszek <mknyszek@google.com>	2024-03-15 22:18:06 +0000
committer	Michael Knyszek <mknyszek@google.com>	2024-04-10 18:52:49 +0000
commit	5bba5b256ce287736da60372c5cf634395d7b1a3 (patch)
tree	8bbb410d62048d6a5d472d2f37c8a17ddb14450e /src/runtime/trace2stack.go
parent	73981695a2518b6eae7f8ffe74d224691c60d433 (diff)
download	go-5bba5b256ce287736da60372c5cf634395d7b1a3.tar.xz