| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Malloc profiling. |
| // Patterned after tcmalloc's algorithms; shorter code. |
| |
| package runtime |
| #include "runtime.h" |
| #include "arch.h" |
| #include "malloc.h" |
| #include "defs.h" |
| #include "go-type.h" |
| |
| // NOTE(rsc): Everything here could use cas if contention became an issue. |
| static Lock proflock; |
| |
| enum { MProf, BProf }; // profile types |
| |
| // Per-call-stack profiling information. |
| // Lookup by hashing call stack into a linked-list hash table. |
| typedef struct Bucket Bucket; |
| struct Bucket |
| { |
| Bucket *next; // next in hash list |
| Bucket *allnext; // next in list of all mbuckets/bbuckets |
| int32 typ; |
| union |
| { |
| struct // typ == MProf |
| { |
| uintptr allocs; |
| uintptr frees; |
| uintptr alloc_bytes; |
| uintptr free_bytes; |
| uintptr recent_allocs; // since last gc |
| uintptr recent_frees; |
| uintptr recent_alloc_bytes; |
| uintptr recent_free_bytes; |
| }; |
| struct // typ == BProf |
| { |
| int64 count; |
| int64 cycles; |
| }; |
| }; |
| uintptr hash; |
| uintptr nstk; |
| uintptr stk[1]; |
| }; |
| enum { |
| BuckHashSize = 179999, |
| }; |
| static Bucket **buckhash; |
| static Bucket *mbuckets; // memory profile buckets |
| static Bucket *bbuckets; // blocking profile buckets |
| static uintptr bucketmem; |
| |
| // Return the bucket for stk[0:nstk], allocating new bucket if needed. |
| static Bucket* |
| stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc) |
| { |
| int32 i; |
| uintptr h; |
| Bucket *b; |
| |
| if(buckhash == nil) { |
| buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]); |
| mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; |
| } |
| |
| // Hash stack. |
| h = 0; |
| for(i=0; i<nstk; i++) { |
| h += stk[i]; |
| h += h<<10; |
| h ^= h>>6; |
| } |
| h += h<<3; |
| h ^= h>>11; |
| |
| i = h%BuckHashSize; |
| for(b = buckhash[i]; b; b=b->next) |
| if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk && |
| runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) |
| return b; |
| |
| if(!alloc) |
| return nil; |
| |
| b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); |
| bucketmem += sizeof *b + nstk*sizeof stk[0]; |
| runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); |
| b->typ = typ; |
| b->hash = h; |
| b->nstk = nstk; |
| b->next = buckhash[i]; |
| buckhash[i] = b; |
| if(typ == MProf) { |
| b->allnext = mbuckets; |
| mbuckets = b; |
| } else { |
| b->allnext = bbuckets; |
| bbuckets = b; |
| } |
| return b; |
| } |
| |
| // Record that a gc just happened: all the 'recent' statistics are now real. |
| void |
| runtime_MProf_GC(void) |
| { |
| Bucket *b; |
| |
| runtime_lock(&proflock); |
| for(b=mbuckets; b; b=b->allnext) { |
| b->allocs += b->recent_allocs; |
| b->frees += b->recent_frees; |
| b->alloc_bytes += b->recent_alloc_bytes; |
| b->free_bytes += b->recent_free_bytes; |
| b->recent_allocs = 0; |
| b->recent_frees = 0; |
| b->recent_alloc_bytes = 0; |
| b->recent_free_bytes = 0; |
| } |
| runtime_unlock(&proflock); |
| } |
| |
| // Map from pointer to Bucket* that allocated it. |
| // Three levels: |
| // Linked-list hash table for top N-AddrHashShift bits. |
| // Array index for next AddrDenseBits bits. |
| // Linked list for next AddrHashShift-AddrDenseBits bits. |
| // This is more efficient than using a general map, |
| // because of the typical clustering of the pointer keys. |
| |
| typedef struct AddrHash AddrHash; |
| typedef struct AddrEntry AddrEntry; |
| |
| enum { |
| AddrHashBits = 12, // good for 4GB of used address space |
| AddrHashShift = 20, // each AddrHash knows about 1MB of address space |
| AddrDenseBits = 8, // good for a profiling rate of 4096 bytes |
| }; |
| |
| struct AddrHash |
| { |
| AddrHash *next; // next in top-level hash table linked list |
| uintptr addr; // addr>>20 |
| AddrEntry *dense[1<<AddrDenseBits]; |
| }; |
| |
| struct AddrEntry |
| { |
| AddrEntry *next; // next in bottom-level linked list |
| uint32 addr; |
| Bucket *b; |
| }; |
| |
| static AddrHash *addrhash[1<<AddrHashBits]; |
| static AddrEntry *addrfree; |
| static uintptr addrmem; |
| |
| // Multiplicative hash function: |
| // hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)). |
| // This is a good multiplier as suggested in CLR, Knuth. The hash |
| // value is taken to be the top AddrHashBits bits of the bottom 32 bits |
| // of the multiplied value. |
| enum { |
| HashMultiplier = 2654435769U |
| }; |
| |
| // Set the bucket associated with addr to b. |
| static void |
| setaddrbucket(uintptr addr, Bucket *b) |
| { |
| int32 i; |
| uint32 h; |
| AddrHash *ah; |
| AddrEntry *e; |
| |
| h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); |
| for(ah=addrhash[h]; ah; ah=ah->next) |
| if(ah->addr == (addr>>AddrHashShift)) |
| goto found; |
| |
| ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1); |
| addrmem += sizeof *ah; |
| ah->next = addrhash[h]; |
| ah->addr = addr>>AddrHashShift; |
| addrhash[h] = ah; |
| |
| found: |
| if((e = addrfree) == nil) { |
| e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0); |
| addrmem += 64*sizeof *e; |
| for(i=0; i+1<64; i++) |
| e[i].next = &e[i+1]; |
| e[63].next = nil; |
| } |
| addrfree = e->next; |
| e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1)); |
| e->b = b; |
| h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. |
| e->next = ah->dense[h]; |
| ah->dense[h] = e; |
| } |
| |
| // Get the bucket associated with addr and clear the association. |
| static Bucket* |
| getaddrbucket(uintptr addr) |
| { |
| uint32 h; |
| AddrHash *ah; |
| AddrEntry *e, **l; |
| Bucket *b; |
| |
| h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); |
| for(ah=addrhash[h]; ah; ah=ah->next) |
| if(ah->addr == (addr>>AddrHashShift)) |
| goto found; |
| return nil; |
| |
| found: |
| h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. |
| for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { |
| if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) { |
| *l = e->next; |
| b = e->b; |
| e->next = addrfree; |
| addrfree = e; |
| return b; |
| } |
| } |
| return nil; |
| } |
| |
| // Called by malloc to record a profiled block. |
| void |
| runtime_MProf_Malloc(void *p, uintptr size) |
| { |
| M *m; |
| int32 nstk; |
| uintptr stk[32]; |
| Bucket *b; |
| |
| m = runtime_m(); |
| if(m->nomemprof > 0) |
| return; |
| |
| m->nomemprof++; |
| nstk = runtime_callers(1, stk, 32); |
| runtime_lock(&proflock); |
| b = stkbucket(MProf, stk, nstk, true); |
| b->recent_allocs++; |
| b->recent_alloc_bytes += size; |
| setaddrbucket((uintptr)p, b); |
| runtime_unlock(&proflock); |
| m = runtime_m(); |
| m->nomemprof--; |
| } |
| |
| // Called when freeing a profiled block. |
| void |
| runtime_MProf_Free(void *p, uintptr size) |
| { |
| M *m; |
| Bucket *b; |
| |
| m = runtime_m(); |
| if(m->nomemprof > 0) |
| return; |
| |
| m->nomemprof++; |
| runtime_lock(&proflock); |
| b = getaddrbucket((uintptr)p); |
| if(b != nil) { |
| b->recent_frees++; |
| b->recent_free_bytes += size; |
| } |
| runtime_unlock(&proflock); |
| m = runtime_m(); |
| m->nomemprof--; |
| } |
| |
| int64 runtime_blockprofilerate; // in CPU ticks |
| |
| void runtime_SetBlockProfileRate(intgo) asm("runtime.SetBlockProfileRate"); |
| |
| void |
| runtime_SetBlockProfileRate(intgo rate) |
| { |
| runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000)); |
| } |
| |
| void |
| runtime_blockevent(int64 cycles, int32 skip) |
| { |
| int32 nstk; |
| int64 rate; |
| uintptr stk[32]; |
| Bucket *b; |
| |
| if(cycles <= 0) |
| return; |
| rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate); |
| if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles)) |
| return; |
| |
| nstk = runtime_callers(skip, stk, 32); |
| runtime_lock(&proflock); |
| b = stkbucket(BProf, stk, nstk, true); |
| b->count++; |
| b->cycles += cycles; |
| runtime_unlock(&proflock); |
| } |
| |
| // Go interface to profile data. (Declared in extern.go) |
| // Assumes Go sizeof(int) == sizeof(int32) |
| |
| // Must match MemProfileRecord in debug.go. |
| typedef struct Record Record; |
| struct Record { |
| int64 alloc_bytes, free_bytes; |
| int64 alloc_objects, free_objects; |
| uintptr stk[32]; |
| }; |
| |
| // Write b's data to r. |
| static void |
| record(Record *r, Bucket *b) |
| { |
| uint32 i; |
| |
| r->alloc_bytes = b->alloc_bytes; |
| r->free_bytes = b->free_bytes; |
| r->alloc_objects = b->allocs; |
| r->free_objects = b->frees; |
| for(i=0; i<b->nstk && i<nelem(r->stk); i++) |
| r->stk[i] = b->stk[i]; |
| for(; i<nelem(r->stk); i++) |
| r->stk[i] = 0; |
| } |
| |
| func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { |
| Bucket *b; |
| Record *r; |
| |
| runtime_lock(&proflock); |
| n = 0; |
| for(b=mbuckets; b; b=b->allnext) |
| if(include_inuse_zero || b->alloc_bytes != b->free_bytes) |
| n++; |
| ok = false; |
| if(n <= p.__count) { |
| ok = true; |
| r = (Record*)p.__values; |
| for(b=mbuckets; b; b=b->allnext) |
| if(include_inuse_zero || b->alloc_bytes != b->free_bytes) |
| record(r++, b); |
| } |
| runtime_unlock(&proflock); |
| } |
| |
| void |
| runtime_MProf_Mark(void (*addroot)(byte *, uintptr)) |
| { |
| // buckhash is not allocated via mallocgc. |
| addroot((byte*)&mbuckets, sizeof mbuckets); |
| addroot((byte*)&bbuckets, sizeof bbuckets); |
| addroot((byte*)&addrhash, sizeof addrhash); |
| addroot((byte*)&addrfree, sizeof addrfree); |
| } |
| |
| // Must match BlockProfileRecord in debug.go. |
| typedef struct BRecord BRecord; |
| struct BRecord { |
| int64 count; |
| int64 cycles; |
| uintptr stk[32]; |
| }; |
| |
| func BlockProfile(p Slice) (n int, ok bool) { |
| Bucket *b; |
| BRecord *r; |
| int32 i; |
| |
| runtime_lock(&proflock); |
| n = 0; |
| for(b=bbuckets; b; b=b->allnext) |
| n++; |
| ok = false; |
| if(n <= p.__count) { |
| ok = true; |
| r = (BRecord*)p.__values; |
| for(b=bbuckets; b; b=b->allnext, r++) { |
| r->count = b->count; |
| r->cycles = b->cycles; |
| for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++) |
| r->stk[i] = b->stk[i]; |
| for(; (uintptr)i<nelem(r->stk); i++) |
| r->stk[i] = 0; |
| } |
| } |
| runtime_unlock(&proflock); |
| } |
| |
| // Must match StackRecord in debug.go. |
| typedef struct TRecord TRecord; |
| struct TRecord { |
| uintptr stk[32]; |
| }; |
| |
| func ThreadCreateProfile(p Slice) (n int, ok bool) { |
| TRecord *r; |
| M *first, *m; |
| |
| first = runtime_atomicloadp(&runtime_allm); |
| n = 0; |
| for(m=first; m; m=m->alllink) |
| n++; |
| ok = false; |
| if(n <= p.__count) { |
| ok = true; |
| r = (TRecord*)p.__values; |
| for(m=first; m; m=m->alllink) { |
| runtime_memmove(r->stk, m->createstack, sizeof r->stk); |
| r++; |
| } |
| } |
| } |
| |
| func Stack(b Slice, all bool) (n int) { |
| byte *pc, *sp; |
| bool enablegc; |
| |
| sp = runtime_getcallersp(&b); |
| pc = runtime_getcallerpc(&b); |
| |
| if(all) { |
| runtime_semacquire(&runtime_worldsema); |
| runtime_m()->gcing = 1; |
| runtime_stoptheworld(); |
| enablegc = mstats.enablegc; |
| mstats.enablegc = false; |
| } |
| |
| if(b.__count == 0) |
| n = 0; |
| else{ |
| G* g = runtime_g(); |
| g->writebuf = (byte*)b.__values; |
| g->writenbuf = b.__count; |
| USED(pc); |
| USED(sp); |
| runtime_goroutineheader(g); |
| runtime_traceback(); |
| runtime_goroutinetrailer(g); |
| if(all) |
| runtime_tracebackothers(g); |
| n = b.__count - g->writenbuf; |
| g->writebuf = nil; |
| g->writenbuf = 0; |
| } |
| |
| if(all) { |
| runtime_m()->gcing = 0; |
| mstats.enablegc = enablegc; |
| runtime_semrelease(&runtime_worldsema); |
| runtime_starttheworld(); |
| } |
| } |
| |
| static void |
| saveg(G *g, TRecord *r) |
| { |
| int32 n; |
| |
| if(g == runtime_g()) |
| n = runtime_callers(0, r->stk, nelem(r->stk)); |
| else { |
| // FIXME: Not implemented. |
| n = 0; |
| } |
| if((size_t)n < nelem(r->stk)) |
| r->stk[n] = 0; |
| } |
| |
| func GoroutineProfile(b Slice) (n int, ok bool) { |
| TRecord *r; |
| G *gp; |
| |
| ok = false; |
| n = runtime_gcount(); |
| if(n <= b.__count) { |
| runtime_semacquire(&runtime_worldsema); |
| runtime_m()->gcing = 1; |
| runtime_stoptheworld(); |
| |
| n = runtime_gcount(); |
| if(n <= b.__count) { |
| G* g = runtime_g(); |
| ok = true; |
| r = (TRecord*)b.__values; |
| saveg(g, r++); |
| for(gp = runtime_allg; gp != nil; gp = gp->alllink) { |
| if(gp == g || gp->status == Gdead) |
| continue; |
| saveg(gp, r++); |
| } |
| } |
| |
| runtime_m()->gcing = 0; |
| runtime_semrelease(&runtime_worldsema); |
| runtime_starttheworld(); |
| } |
| } |
| |