diff options
| author | Michael Anthony Knyszek <mknyszek@google.com> | 2024-03-15 22:18:06 +0000 |
|---|---|---|
| committer | Michael Knyszek <mknyszek@google.com> | 2024-04-10 18:52:49 +0000 |
| commit | 5bba5b256ce287736da60372c5cf634395d7b1a3 (patch) | |
| tree | 8bbb410d62048d6a5d472d2f37c8a17ddb14450e /src/runtime/trace2stack.go | |
| parent | 73981695a2518b6eae7f8ffe74d224691c60d433 (diff) | |
| download | go-5bba5b256ce287736da60372c5cf634395d7b1a3.tar.xz | |
runtime: rewrite traceMap to scale better
The existing implementation of traceMap is a hash map with a fixed
bucket table size which scales poorly with the number of elements added
to the map. After a few thousands elements are in the map, it tends to
fall over.
Furthermore, cleaning up the trace map is currently non-preemptible,
without very good reason.
This change replaces the traceMap implementation with a simple
append-only concurrent hash-trie. The data structure is incredibly
simple and does not suffer at all from the same scaling issues.
Because the traceMap no longer has a lock, and the traceRegionAlloc it
embeds is not thread-safe, we have to push that lock down. While we're
here, this change also makes the fast path for the traceRegionAlloc
lock-free. This may not be inherently faster due to contention on the
atomic add, but it creates an easy path to sharding the main allocation
buffer to reduce contention in the future. (We might want to also
consider a fully thread-local allocator that covers both string and
stack tables. The only reason a thread-local allocator isn't feasible
right now is because each of these has their own region, but we could
certainly group all them together.)
Change-Id: I8c06d42825c326061a1b8569e322afc4bc2a513a
Reviewed-on: https://go-review.googlesource.com/c/go/+/570035
Reviewed-by: Carlos Amedee <carlos@golang.org>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
TryBot-Bypass: Michael Knyszek <mknyszek@google.com>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/runtime/trace2stack.go')
| -rw-r--r-- | src/runtime/trace2stack.go | 91 |
1 files changed, 42 insertions, 49 deletions
diff --git a/src/runtime/trace2stack.go b/src/runtime/trace2stack.go index 7d698c89d3..dfccaabd62 100644 --- a/src/runtime/trace2stack.go +++ b/src/runtime/trace2stack.go @@ -140,62 +140,55 @@ func (t *traceStackTable) put(pcs []uintptr) uint64 { // can guarantee that there are no more writers to the table. func (t *traceStackTable) dump(gen uintptr) { w := unsafeTraceWriter(gen, nil) + if root := (*traceMapNode)(t.tab.root.Load()); root != nil { + w = dumpStacksRec(root, w) + } + w.flush().end() + t.tab.reset() +} - // Iterate over the table. - // - // Do not acquire t.tab.lock. There's a conceptual lock cycle between acquiring this lock - // here and allocation-related locks. Specifically, this lock may be acquired when an event - // is emitted in allocation paths. Simultaneously, we might allocate here with the lock held, - // creating a cycle. In practice, this cycle is never exercised. Because the table is only - // dumped once there are no more writers, it's not possible for the cycle to occur. However - // the lockrank mode is not sophisticated enough to identify this, and if it's not possible - // for that cycle to happen, then it's also not possible for this to race with writers to - // the table. - for i := range t.tab.tab { - stk := t.tab.bucket(i) - for ; stk != nil; stk = stk.next() { - stack := unsafe.Slice((*uintptr)(unsafe.Pointer(&stk.data[0])), uintptr(len(stk.data))/unsafe.Sizeof(uintptr(0))) +func dumpStacksRec(node *traceMapNode, w traceWriter) traceWriter { + stack := unsafe.Slice((*uintptr)(unsafe.Pointer(&node.data[0])), uintptr(len(node.data))/unsafe.Sizeof(uintptr(0))) - // N.B. This might allocate, but that's OK because we're not writing to the M's buffer, - // but one we're about to create (with ensure). - frames := makeTraceFrames(gen, fpunwindExpand(stack)) + // N.B. This might allocate, but that's OK because we're not writing to the M's buffer, + // but one we're about to create (with ensure). + frames := makeTraceFrames(w.gen, fpunwindExpand(stack)) - // Returns the maximum number of bytes required to hold the encoded stack, given that - // it contains N frames. - maxBytes := 1 + (2+4*len(frames))*traceBytesPerNumber + // The maximum number of bytes required to hold the encoded stack, given that + // it contains N frames. + maxBytes := 1 + (2+4*len(frames))*traceBytesPerNumber - // Estimate the size of this record. This - // bound is pretty loose, but avoids counting - // lots of varint sizes. - // - // Add 1 because we might also write traceEvStacks. - var flushed bool - w, flushed = w.ensure(1 + maxBytes) - if flushed { - w.byte(byte(traceEvStacks)) - } + // Estimate the size of this record. This + // bound is pretty loose, but avoids counting + // lots of varint sizes. + // + // Add 1 because we might also write traceEvStacks. + var flushed bool + w, flushed = w.ensure(1 + maxBytes) + if flushed { + w.byte(byte(traceEvStacks)) + } - // Emit stack event. - w.byte(byte(traceEvStack)) - w.varint(uint64(stk.id)) - w.varint(uint64(len(frames))) - for _, frame := range frames { - w.varint(uint64(frame.PC)) - w.varint(frame.funcID) - w.varint(frame.fileID) - w.varint(frame.line) - } - } + // Emit stack event. + w.byte(byte(traceEvStack)) + w.varint(uint64(node.id)) + w.varint(uint64(len(frames))) + for _, frame := range frames { + w.varint(uint64(frame.PC)) + w.varint(frame.funcID) + w.varint(frame.fileID) + w.varint(frame.line) } - // Still, hold the lock over reset. The callee expects it, even though it's - // not strictly necessary. - systemstack(func() { - lock(&t.tab.lock) - t.tab.reset() - unlock(&t.tab.lock) - }) - w.flush().end() + // Recursively walk all child nodes. + for i := range node.children { + child := node.children[i].Load() + if child == nil { + continue + } + w = dumpStacksRec((*traceMapNode)(child), w) + } + return w } // makeTraceFrames returns the frames corresponding to pcs. It may |
