| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Malloc profiling. |
| // Patterned after tcmalloc's algorithms; shorter code. |
| |
| package runtime |
| #include "runtime.h" |
| #include "arch_GOARCH.h" |
| #include "malloc.h" |
| #include "defs_GOOS_GOARCH.h" |
| #include "type.h" |
| |
| // NOTE(rsc): Everything here could use cas if contention became an issue. |
| static Lock proflock; |
| |
| // All memory allocations are local and do not escape outside of the profiler. |
| // The profiler is forbidden from referring to garbage-collected memory. |
| |
| enum { MProf, BProf }; // profile types |
| |
| // Per-call-stack profiling information. |
| // Lookup by hashing call stack into a linked-list hash table. |
| struct Bucket |
| { |
| Bucket *next; // next in hash list |
| Bucket *allnext; // next in list of all mbuckets/bbuckets |
| int32 typ; |
| // Generally unions can break precise GC, |
| // this one is fine because it does not contain pointers. |
| union |
| { |
| struct // typ == MProf |
| { |
| // The following complex 3-stage scheme of stats accumulation |
| // is required to obtain a consistent picture of mallocs and frees |
| // for some point in time. |
| // The problem is that mallocs come in real time, while frees |
| // come only after a GC during concurrent sweeping. So if we would |
| // naively count them, we would get a skew toward mallocs. |
| // |
| // Mallocs are accounted in recent stats. |
| // Explicit frees are accounted in recent stats. |
| // GC frees are accounted in prev stats. |
| // After GC prev stats are added to final stats and |
| // recent stats are moved into prev stats. |
| uintptr allocs; |
| uintptr frees; |
| uintptr alloc_bytes; |
| uintptr free_bytes; |
| |
| uintptr prev_allocs; // since last but one till last gc |
| uintptr prev_frees; |
| uintptr prev_alloc_bytes; |
| uintptr prev_free_bytes; |
| |
| uintptr recent_allocs; // since last gc till now |
| uintptr recent_frees; |
| uintptr recent_alloc_bytes; |
| uintptr recent_free_bytes; |
| |
| }; |
| struct // typ == BProf |
| { |
| int64 count; |
| int64 cycles; |
| }; |
| }; |
| uintptr hash; // hash of size + stk |
| uintptr size; |
| uintptr nstk; |
| uintptr stk[1]; |
| }; |
| enum { |
| BuckHashSize = 179999, |
| }; |
| static Bucket **buckhash; |
| static Bucket *mbuckets; // memory profile buckets |
| static Bucket *bbuckets; // blocking profile buckets |
| static uintptr bucketmem; |
| |
| // Return the bucket for stk[0:nstk], allocating new bucket if needed. |
| static Bucket* |
| stkbucket(int32 typ, uintptr size, uintptr *stk, int32 nstk, bool alloc) |
| { |
| int32 i; |
| uintptr h; |
| Bucket *b; |
| |
| if(buckhash == nil) { |
| buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys); |
| if(buckhash == nil) |
| runtime·throw("runtime: cannot allocate memory"); |
| } |
| |
| // Hash stack. |
| h = 0; |
| for(i=0; i<nstk; i++) { |
| h += stk[i]; |
| h += h<<10; |
| h ^= h>>6; |
| } |
| // hash in size |
| h += size; |
| h += h<<10; |
| h ^= h>>6; |
| // finalize |
| h += h<<3; |
| h ^= h>>11; |
| |
| i = h%BuckHashSize; |
| for(b = buckhash[i]; b; b=b->next) |
| if(b->typ == typ && b->hash == h && b->size == size && b->nstk == nstk && |
| runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) |
| return b; |
| |
| if(!alloc) |
| return nil; |
| |
| b = runtime·persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys); |
| bucketmem += sizeof *b + nstk*sizeof stk[0]; |
| runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); |
| b->typ = typ; |
| b->hash = h; |
| b->size = size; |
| b->nstk = nstk; |
| b->next = buckhash[i]; |
| buckhash[i] = b; |
| if(typ == MProf) { |
| b->allnext = mbuckets; |
| mbuckets = b; |
| } else { |
| b->allnext = bbuckets; |
| bbuckets = b; |
| } |
| return b; |
| } |
| |
| static void |
| MProf_GC(void) |
| { |
| Bucket *b; |
| |
| for(b=mbuckets; b; b=b->allnext) { |
| b->allocs += b->prev_allocs; |
| b->frees += b->prev_frees; |
| b->alloc_bytes += b->prev_alloc_bytes; |
| b->free_bytes += b->prev_free_bytes; |
| |
| b->prev_allocs = b->recent_allocs; |
| b->prev_frees = b->recent_frees; |
| b->prev_alloc_bytes = b->recent_alloc_bytes; |
| b->prev_free_bytes = b->recent_free_bytes; |
| |
| b->recent_allocs = 0; |
| b->recent_frees = 0; |
| b->recent_alloc_bytes = 0; |
| b->recent_free_bytes = 0; |
| } |
| } |
| |
| // Record that a gc just happened: all the 'recent' statistics are now real. |
| void |
| runtime·MProf_GC(void) |
| { |
| runtime·lock(&proflock); |
| MProf_GC(); |
| runtime·unlock(&proflock); |
| } |
| |
| // Called by malloc to record a profiled block. |
| void |
| runtime·MProf_Malloc(void *p, uintptr size) |
| { |
| uintptr stk[32]; |
| Bucket *b; |
| int32 nstk; |
| |
| nstk = runtime·callers(1, stk, nelem(stk)); |
| runtime·lock(&proflock); |
| b = stkbucket(MProf, size, stk, nstk, true); |
| b->recent_allocs++; |
| b->recent_alloc_bytes += size; |
| runtime·unlock(&proflock); |
| |
| // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock. |
| // This reduces potential contention and chances of deadlocks. |
| // Since the object must be alive during call to MProf_Malloc, |
| // it's fine to do this non-atomically. |
| runtime·setprofilebucket(p, b); |
| } |
| |
| // Called when freeing a profiled block. |
| void |
| runtime·MProf_Free(Bucket *b, uintptr size, bool freed) |
| { |
| runtime·lock(&proflock); |
| if(freed) { |
| b->recent_frees++; |
| b->recent_free_bytes += size; |
| } else { |
| b->prev_frees++; |
| b->prev_free_bytes += size; |
| } |
| runtime·unlock(&proflock); |
| } |
| |
| int64 runtime·blockprofilerate; // in CPU ticks |
| |
| void |
| runtime·SetBlockProfileRate(intgo rate) |
| { |
| int64 r; |
| |
| if(rate <= 0) |
| r = 0; // disable profiling |
| else { |
| // convert ns to cycles, use float64 to prevent overflow during multiplication |
| r = (float64)rate*runtime·tickspersecond()/(1000*1000*1000); |
| if(r == 0) |
| r = 1; |
| } |
| runtime·atomicstore64((uint64*)&runtime·blockprofilerate, r); |
| } |
| |
| void |
| runtime·blockevent(int64 cycles, int32 skip) |
| { |
| int32 nstk; |
| int64 rate; |
| uintptr stk[32]; |
| Bucket *b; |
| |
| if(cycles <= 0) |
| return; |
| rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate); |
| if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles)) |
| return; |
| |
| nstk = runtime·callers(skip, stk, nelem(stk)); |
| runtime·lock(&proflock); |
| b = stkbucket(BProf, 0, stk, nstk, true); |
| b->count++; |
| b->cycles += cycles; |
| runtime·unlock(&proflock); |
| } |
| |
| // Go interface to profile data. (Declared in debug.go) |
| |
| // Must match MemProfileRecord in debug.go. |
| typedef struct Record Record; |
| struct Record { |
| int64 alloc_bytes, free_bytes; |
| int64 alloc_objects, free_objects; |
| uintptr stk[32]; |
| }; |
| |
| // Write b's data to r. |
| static void |
| record(Record *r, Bucket *b) |
| { |
| int32 i; |
| |
| r->alloc_bytes = b->alloc_bytes; |
| r->free_bytes = b->free_bytes; |
| r->alloc_objects = b->allocs; |
| r->free_objects = b->frees; |
| for(i=0; i<b->nstk && i<nelem(r->stk); i++) |
| r->stk[i] = b->stk[i]; |
| for(; i<nelem(r->stk); i++) |
| r->stk[i] = 0; |
| } |
| |
| func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { |
| Bucket *b; |
| Record *r; |
| bool clear; |
| |
| runtime·lock(&proflock); |
| n = 0; |
| clear = true; |
| for(b=mbuckets; b; b=b->allnext) { |
| if(include_inuse_zero || b->alloc_bytes != b->free_bytes) |
| n++; |
| if(b->allocs != 0 || b->frees != 0) |
| clear = false; |
| } |
| if(clear) { |
| // Absolutely no data, suggesting that a garbage collection |
| // has not yet happened. In order to allow profiling when |
| // garbage collection is disabled from the beginning of execution, |
| // accumulate stats as if a GC just happened, and recount buckets. |
| MProf_GC(); |
| MProf_GC(); |
| n = 0; |
| for(b=mbuckets; b; b=b->allnext) |
| if(include_inuse_zero || b->alloc_bytes != b->free_bytes) |
| n++; |
| } |
| ok = false; |
| if(n <= p.len) { |
| ok = true; |
| r = (Record*)p.array; |
| for(b=mbuckets; b; b=b->allnext) |
| if(include_inuse_zero || b->alloc_bytes != b->free_bytes) |
| record(r++, b); |
| } |
| runtime·unlock(&proflock); |
| } |
| |
| void |
| runtime·iterate_memprof(void (*callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr)) |
| { |
| Bucket *b; |
| |
| runtime·lock(&proflock); |
| for(b=mbuckets; b; b=b->allnext) { |
| callback(b, b->nstk, b->stk, b->size, b->allocs, b->frees); |
| } |
| runtime·unlock(&proflock); |
| } |
| |
| // Must match BlockProfileRecord in debug.go. |
| typedef struct BRecord BRecord; |
| struct BRecord { |
| int64 count; |
| int64 cycles; |
| uintptr stk[32]; |
| }; |
| |
| func BlockProfile(p Slice) (n int, ok bool) { |
| Bucket *b; |
| BRecord *r; |
| int32 i; |
| |
| runtime·lock(&proflock); |
| n = 0; |
| for(b=bbuckets; b; b=b->allnext) |
| n++; |
| ok = false; |
| if(n <= p.len) { |
| ok = true; |
| r = (BRecord*)p.array; |
| for(b=bbuckets; b; b=b->allnext, r++) { |
| r->count = b->count; |
| r->cycles = b->cycles; |
| for(i=0; i<b->nstk && i<nelem(r->stk); i++) |
| r->stk[i] = b->stk[i]; |
| for(; i<nelem(r->stk); i++) |
| r->stk[i] = 0; |
| } |
| } |
| runtime·unlock(&proflock); |
| } |
| |
| // Must match StackRecord in debug.go. |
| typedef struct TRecord TRecord; |
| struct TRecord { |
| uintptr stk[32]; |
| }; |
| |
| func ThreadCreateProfile(p Slice) (n int, ok bool) { |
| TRecord *r; |
| M *first, *mp; |
| |
| first = runtime·atomicloadp(&runtime·allm); |
| n = 0; |
| for(mp=first; mp; mp=mp->alllink) |
| n++; |
| ok = false; |
| if(n <= p.len) { |
| ok = true; |
| r = (TRecord*)p.array; |
| for(mp=first; mp; mp=mp->alllink) { |
| runtime·memmove(r->stk, mp->createstack, sizeof r->stk); |
| r++; |
| } |
| } |
| } |
| |
| func Stack(b Slice, all bool) (n int) { |
| uintptr pc, sp; |
| |
| sp = runtime·getcallersp(&b); |
| pc = (uintptr)runtime·getcallerpc(&b); |
| |
| if(all) { |
| runtime·semacquire(&runtime·worldsema, false); |
| m->gcing = 1; |
| runtime·stoptheworld(); |
| } |
| |
| if(b.len == 0) |
| n = 0; |
| else{ |
| g->writebuf = (byte*)b.array; |
| g->writenbuf = b.len; |
| runtime·goroutineheader(g); |
| runtime·traceback(pc, sp, 0, g); |
| if(all) |
| runtime·tracebackothers(g); |
| n = b.len - g->writenbuf; |
| g->writebuf = nil; |
| g->writenbuf = 0; |
| } |
| |
| if(all) { |
| m->gcing = 0; |
| runtime·semrelease(&runtime·worldsema); |
| runtime·starttheworld(); |
| } |
| } |
| |
| static void |
| saveg(uintptr pc, uintptr sp, G *gp, TRecord *r) |
| { |
| int32 n; |
| |
| n = runtime·gentraceback(pc, sp, 0, gp, 0, r->stk, nelem(r->stk), nil, nil, false); |
| if(n < nelem(r->stk)) |
| r->stk[n] = 0; |
| } |
| |
| func GoroutineProfile(b Slice) (n int, ok bool) { |
| uintptr pc, sp, i; |
| TRecord *r; |
| G *gp; |
| |
| sp = runtime·getcallersp(&b); |
| pc = (uintptr)runtime·getcallerpc(&b); |
| |
| ok = false; |
| n = runtime·gcount(); |
| if(n <= b.len) { |
| runtime·semacquire(&runtime·worldsema, false); |
| m->gcing = 1; |
| runtime·stoptheworld(); |
| |
| n = runtime·gcount(); |
| if(n <= b.len) { |
| ok = true; |
| r = (TRecord*)b.array; |
| saveg(pc, sp, g, r++); |
| for(i = 0; i < runtime·allglen; i++) { |
| gp = runtime·allg[i]; |
| if(gp == g || gp->status == Gdead) |
| continue; |
| saveg(~(uintptr)0, ~(uintptr)0, gp, r++); |
| } |
| } |
| |
| m->gcing = 0; |
| runtime·semrelease(&runtime·worldsema); |
| runtime·starttheworld(); |
| } |
| } |
| |
| // Tracing of alloc/free/gc. |
| |
| static Lock tracelock; |
| |
| static int8* |
| typeinfoname(int32 typeinfo) |
| { |
| if(typeinfo == TypeInfo_SingleObject) |
| return "single object"; |
| else if(typeinfo == TypeInfo_Array) |
| return "array"; |
| else if(typeinfo == TypeInfo_Chan) |
| return "channel"; |
| runtime·throw("typinfoname: unknown type info"); |
| return nil; |
| } |
| |
| void |
| runtime·tracealloc(void *p, uintptr size, uintptr typ) |
| { |
| int8 *name; |
| Type *type; |
| |
| runtime·lock(&tracelock); |
| m->traceback = 2; |
| type = (Type*)(typ & ~3); |
| name = typeinfoname(typ & 3); |
| if(type == nil) |
| runtime·printf("tracealloc(%p, %p, %s)\n", p, size, name); |
| else |
| runtime·printf("tracealloc(%p, %p, %s of %S)\n", p, size, name, *type->string); |
| if(m->curg == nil || g == m->curg) { |
| runtime·goroutineheader(g); |
| runtime·traceback((uintptr)runtime·getcallerpc(&p), (uintptr)runtime·getcallersp(&p), 0, g); |
| } else { |
| runtime·goroutineheader(m->curg); |
| runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, m->curg); |
| } |
| runtime·printf("\n"); |
| m->traceback = 0; |
| runtime·unlock(&tracelock); |
| } |
| |
| void |
| runtime·tracefree(void *p, uintptr size) |
| { |
| runtime·lock(&tracelock); |
| m->traceback = 2; |
| runtime·printf("tracefree(%p, %p)\n", p, size); |
| runtime·goroutineheader(g); |
| runtime·traceback((uintptr)runtime·getcallerpc(&p), (uintptr)runtime·getcallersp(&p), 0, g); |
| runtime·printf("\n"); |
| m->traceback = 0; |
| runtime·unlock(&tracelock); |
| } |
| |
| void |
| runtime·tracegc(void) |
| { |
| runtime·lock(&tracelock); |
| m->traceback = 2; |
| runtime·printf("tracegc()\n"); |
| // running on m->g0 stack; show all non-g0 goroutines |
| runtime·tracebackothers(g); |
| runtime·printf("end tracegc\n"); |
| runtime·printf("\n"); |
| m->traceback = 0; |
| runtime·unlock(&tracelock); |
| } |