diff options
Diffstat (limited to 'src/pkg')
| -rw-r--r-- | src/pkg/runtime/mprof.go | 424 |
1 files changed, 384 insertions, 40 deletions
diff --git a/src/pkg/runtime/mprof.go b/src/pkg/runtime/mprof.go index 78384f8117..1acfdb9b89 100644 --- a/src/pkg/runtime/mprof.go +++ b/src/pkg/runtime/mprof.go @@ -17,11 +17,258 @@ var proflock mutex // All memory allocations are local and do not escape outside of the profiler. // The profiler is forbidden from referring to garbage-collected memory. +/* +enum { MProf, BProf } // profile types +*/ + +/* +// Per-call-stack profiling information. +// Lookup by hashing call stack into a linked-list hash table. +struct Bucket +{ + Bucket *next // next in hash list + Bucket *allnext // next in list of all mbuckets/bbuckets + int32 typ + // Generally unions can break precise GC, + // this one is fine because it does not contain pointers. + union + { + struct MProfRecord // typ == MProf + { + // The following complex 3-stage scheme of stats accumulation + // is required to obtain a consistent picture of mallocs and frees + // for some point in time. + // The problem is that mallocs come in real time, while frees + // come only after a GC during concurrent sweeping. So if we would + // naively count them, we would get a skew toward mallocs. + // + // Mallocs are accounted in recent stats. + // Explicit frees are accounted in recent stats. + // GC frees are accounted in prev stats. + // After GC prev stats are added to final stats and + // recent stats are moved into prev stats. + uintptr allocs + uintptr frees + uintptr alloc_bytes + uintptr free_bytes + + uintptr prev_allocs // since last but one till last gc + uintptr prev_frees + uintptr prev_alloc_bytes + uintptr prev_free_bytes + + uintptr recent_allocs // since last gc till now + uintptr recent_frees + uintptr recent_alloc_bytes + uintptr recent_free_bytes + + } mp + struct BProfRecord // typ == BProf + { + int64 count + int64 cycles + } bp + } data + uintptr hash // hash of size + stk + uintptr size + uintptr nstk + uintptr stk[1] +} +*/ + var ( mbuckets *bucket // memory profile buckets bbuckets *bucket // blocking profile buckets ) +/* +enum { + BuckHashSize = 179999, +} +static Bucket **buckhash +static uintptr bucketmem +*/ + +/* +// Return the bucket for stk[0:nstk], allocating new bucket if needed. +static Bucket* +stkbucket(int32 typ, uintptr size, uintptr *stk, int32 nstk, bool alloc) +{ + int32 i + uintptr h + Bucket *b + + if(buckhash == nil) { + buckhash = sysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys) + if(buckhash == nil) + throw("runtime: cannot allocate memory") + } + + // Hash stack. + h = 0 + for(i=0 i<nstk i++) { + h += stk[i] + h += h<<10 + h ^= h>>6 + } + // hash in size + h += size + h += h<<10 + h ^= h>>6 + // finalize + h += h<<3 + h ^= h>>11 + + i = h%BuckHashSize + for(b = buckhash[i] b b=b.next) + if(b.typ == typ && b.hash == h && b.size == size && b.nstk == nstk && + mcmp((byte*)b.stk, (byte*)stk, nstk*sizeof stk[0]) == 0) + return b + + if(!alloc) + return nil + + b = persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys) + bucketmem += sizeof *b + nstk*sizeof stk[0] + memmove(b.stk, stk, nstk*sizeof stk[0]) + b.typ = typ + b.hash = h + b.size = size + b.nstk = nstk + b.next = buckhash[i] + buckhash[i] = b + if(typ == MProf) { + b.allnext = mbuckets + mbuckets = b + } else { + b.allnext = bbuckets + bbuckets = b + } + return b +} +*/ + +func mprof_GC() { + for b := mbuckets; b != nil; b = b.allnext { + b.data.mp.allocs += b.data.mp.prev_allocs + b.data.mp.frees += b.data.mp.prev_frees + b.data.mp.alloc_bytes += b.data.mp.prev_alloc_bytes + b.data.mp.free_bytes += b.data.mp.prev_free_bytes + + b.data.mp.prev_allocs = b.data.mp.recent_allocs + b.data.mp.prev_frees = b.data.mp.recent_frees + b.data.mp.prev_alloc_bytes = b.data.mp.recent_alloc_bytes + b.data.mp.prev_free_bytes = b.data.mp.recent_free_bytes + + b.data.mp.recent_allocs = 0 + b.data.mp.recent_frees = 0 + b.data.mp.recent_alloc_bytes = 0 + b.data.mp.recent_free_bytes = 0 + } +} + +/* +// Record that a gc just happened: all the 'recent' statistics are now real. +void +MProf_GC(void) +{ + lock(&proflock) + MProf_GC() + unlock(&proflock) +} +*/ + +/* +// Called by malloc to record a profiled block. +void +MProf_Malloc(void *p, uintptr size) +{ + uintptr stk[32] + Bucket *b + int32 nstk + + nstk = callers(1, stk, nelem(stk)) + lock(&proflock) + b = stkbucket(MProf, size, stk, nstk, true) + b.data.mp.recent_allocs++ + b.data.mp.recent_alloc_bytes += size + unlock(&proflock) + + // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock. + // This reduces potential contention and chances of deadlocks. + // Since the object must be alive during call to MProf_Malloc, + // it's fine to do this non-atomically. + setprofilebucket(p, b) +} +*/ + +/* +void +MProf_Free(Bucket *b, uintptr size, bool freed) +{ + lock(&proflock) + if(freed) { + b.data.mp.recent_frees++ + b.data.mp.recent_free_bytes += size + } else { + b.data.mp.prev_frees++ + b.data.mp.prev_free_bytes += size + } + unlock(&proflock) +} +*/ + +/* +int64 blockprofilerate // in CPU ticks +*/ + +/* +void +SetBlockProfileRate(intgo rate) +{ + int64 r + + if(rate <= 0) + r = 0 // disable profiling + else { + // convert ns to cycles, use float64 to prevent overflow during multiplication + r = (float64)rate*tickspersecond()/(1000*1000*1000) + if(r == 0) + r = 1 + } + atomicstore64((uint64*)&blockprofilerate, r) +} +*/ + +/* +void +blockevent(int64 cycles, int32 skip) +{ + int32 nstk + int64 rate + uintptr stk[32] + Bucket *b + + if(cycles <= 0) + return + rate = atomicload64((uint64*)&blockprofilerate) + if(rate <= 0 || (rate > cycles && fastrand1()%rate > cycles)) + return + + if(g.m.curg == nil || g.m.curg == g) + nstk = callers(skip, stk, nelem(stk)) + else + nstk = gcallers(g.m.curg, skip, stk, nelem(stk)) + lock(&proflock) + b = stkbucket(BProf, 0, stk, nstk, true) + b.data.bp.count++ + b.data.bp.cycles += cycles + unlock(&proflock) +} +*/ + +// Go interface to profile data. + // MemProfile returns n, the number of records in the current memory profile. // If len(p) >= n, MemProfile copies the profile into p and returns n, true. // If len(p) < n, MemProfile does not change p and returns n, false. @@ -73,25 +320,6 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { return } -func mprof_GC() { - for b := mbuckets; b != nil; b = b.allnext { - b.data.mp.allocs += b.data.mp.prev_allocs - b.data.mp.frees += b.data.mp.prev_frees - b.data.mp.alloc_bytes += b.data.mp.prev_alloc_bytes - b.data.mp.free_bytes += b.data.mp.prev_free_bytes - - b.data.mp.prev_allocs = b.data.mp.recent_allocs - b.data.mp.prev_frees = b.data.mp.recent_frees - b.data.mp.prev_alloc_bytes = b.data.mp.recent_alloc_bytes - b.data.mp.prev_free_bytes = b.data.mp.recent_free_bytes - - b.data.mp.recent_allocs = 0 - b.data.mp.recent_frees = 0 - b.data.mp.recent_alloc_bytes = 0 - b.data.mp.recent_free_bytes = 0 - } -} - // Write b's data to r. func record(r *MemProfileRecord, b *bucket) { r.AllocBytes = int64(b.data.mp.alloc_bytes) @@ -106,6 +334,20 @@ func record(r *MemProfileRecord, b *bucket) { } } +/* +void +iterate_memprof(void (*callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr)) +{ + Bucket *b + + lock(&proflock) + for(b=mbuckets b b=b.allnext) { + callback(b, b.nstk, b.stk, b.size, b.data.mp.allocs, b.data.mp.frees) + } + unlock(&proflock) +} +*/ + // BlockProfile returns n, the number of records in the current blocking profile. // If len(p) >= n, BlockProfile copies the profile into p and returns n, true. // If len(p) < n, BlockProfile does not change p and returns n, false. @@ -141,6 +383,78 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { return } +// ThreadCreateProfile returns n, the number of records in the thread creation profile. +// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true. +// If len(p) < n, ThreadCreateProfile does not change p and returns n, false. +// +// Most clients should use the runtime/pprof package instead +// of calling ThreadCreateProfile directly. +func ThreadCreateProfile(p []StackRecord) (n int, ok bool) { + first := (*m)(atomicloadp(unsafe.Pointer(&allm))) + for mp := first; mp != nil; mp = mp.alllink { + n++ + } + if n <= len(p) { + ok = true + i := 0 + for mp := first; mp != nil; mp = mp.alllink { + for s := range mp.createstack { + p[i].Stack0[s] = uintptr(mp.createstack[s]) + } + i++ + } + } + return +} + +/* +func GoroutineProfile(b Slice) (n int, ok bool) { + uintptr pc, sp, i + TRecord *r + G *gp + + sp = getcallersp(&b) + pc = (uintptr)getcallerpc(&b) + + ok = false + n = gcount() + if(n <= b.len) { + semacquire(&worldsema, false) + g.m.gcing = 1 + stoptheworld() + + n = gcount() + if(n <= b.len) { + ok = true + r = (TRecord*)b.array + saveg(pc, sp, g, r++) + for(i = 0 i < allglen i++) { + gp = allg[i] + if(gp == g || readgstatus(gp) == Gdead) + continue + saveg(~(uintptr)0, ~(uintptr)0, gp, r++) + } + } + + g.m.gcing = 0 + semrelease(&worldsema) + starttheworld() + } +} +*/ + +/* +static void +saveg(uintptr pc, uintptr sp, G *gp, TRecord *r) +{ + int32 n + + n = gentraceback(pc, sp, 0, gp, 0, r.stk, nelem(r.stk), nil, nil, false) + if(n < nelem(r.stk)) + r.stk[n] = 0 +} +*/ + // Stack formats a stack trace of the calling goroutine into buf // and returns the number of bytes written to buf. // If all is true, Stack formats stack traces of all other goroutines @@ -181,26 +495,56 @@ func Stack(buf []byte, all bool) int { return n } -// ThreadCreateProfile returns n, the number of records in the thread creation profile. -// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true. -// If len(p) < n, ThreadCreateProfile does not change p and returns n, false. -// -// Most clients should use the runtime/pprof package instead -// of calling ThreadCreateProfile directly. -func ThreadCreateProfile(p []StackRecord) (n int, ok bool) { - first := (*m)(atomicloadp(unsafe.Pointer(&allm))) - for mp := first; mp != nil; mp = mp.alllink { - n++ - } - if n <= len(p) { - ok = true - i := 0 - for mp := first; mp != nil; mp = mp.alllink { - for s := range mp.createstack { - p[i].Stack0[s] = uintptr(mp.createstack[s]) - } - i++ - } +/* +// Tracing of alloc/free/gc. + +static Mutex tracelock + +void +tracealloc(void *p, uintptr size, Type *type) +{ + lock(&tracelock) + g.m.traceback = 2 + if(type == nil) + printf("tracealloc(%p, %p)\n", p, size) + else + printf("tracealloc(%p, %p, %S)\n", p, size, *type.string) + if(g.m.curg == nil || g == g.m.curg) { + goroutineheader(g) + traceback((uintptr)getcallerpc(&p), (uintptr)getcallersp(&p), 0, g) + } else { + goroutineheader(g.m.curg) + traceback(~(uintptr)0, ~(uintptr)0, 0, g.m.curg) } - return + printf("\n") + g.m.traceback = 0 + unlock(&tracelock) +} + +void +tracefree(void *p, uintptr size) +{ + lock(&tracelock) + g.m.traceback = 2 + printf("tracefree(%p, %p)\n", p, size) + goroutineheader(g) + traceback((uintptr)getcallerpc(&p), (uintptr)getcallersp(&p), 0, g) + printf("\n") + g.m.traceback = 0 + unlock(&tracelock) +} + +void +tracegc(void) +{ + lock(&tracelock) + g.m.traceback = 2 + printf("tracegc()\n") + // running on m.g0 stack show all non-g0 goroutines + tracebackothers(g) + printf("end tracegc\n") + printf("\n") + g.m.traceback = 0 + unlock(&tracelock) } +*/ |
