mark&sweep, 2分钟保证至少一次GC过程,如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
进行mark的过程中,会停止一切G的运行,mark的过程是多任务并发的
sweep的过程是分散的
mark过程
整个程序内存块包括 .data, .bss, 每个G的stack, SpecialFinalizer
每段内存都有其相应的bitmap,用来表示每个word(8BYTE)的标志位,每word需要4bit的标志位
mark的过程就是递归遍历内存块bitmap的过程
word标志位有如下3种类型:
- 标量
- 指针
连续两个word表示一个iface或eface
// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
// unscanned objects left. Instead of using an explicit recursion, it keeps
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
static void
scanblock(byte b, uintptr n, byte ptrmask)
{
byte obj, obj0, p, arena_start, *arena_used, **wp, scanbuf[8], ptrbitp, bitp;
uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
Workbuf wbuf;
Iface iface;
Eface eface;
Type typ;
MSpan s;
pageID k;
bool keepworking;// Cache memory arena parameters in local vars. arena_start = runtime·mheap.arena_start; arena_used = runtime·mheap.arena_used; wbuf = getempty(nil); nobj = wbuf->nobj; wp = &wbuf->obj[nobj]; keepworking = b == nil; scanbufpos = 0; for(i = 0; i < nelem(scanbuf); i++) scanbuf[i] = nil; ptrbitp = nil; // ptrmask can have 2 possible values: // 1. nil - obtain pointer mask from GC bitmap. // 2. pointer to a compact mask (for stacks and data). if(b != nil) goto scanobj; for(;;) { if(nobj == 0) { // Out of work in workbuf. // First, see is there is any work in scanbuf. for(i = 0; i < nelem(scanbuf); i++) { b = scanbuf[scanbufpos]; scanbuf[scanbufpos++] = nil; scanbufpos %= nelem(scanbuf); if(b != nil) { n = arena_used - b; // scan until bitBoundary or BitsDead ptrmask = nil; // use GC bitmap for pointer info goto scanobj; } } if(!keepworking) { putempty(wbuf); return; } // Refill workbuf from global queue. wbuf = getfull(wbuf); if(wbuf == nil) return; nobj = wbuf->nobj; wp = &wbuf->obj[nobj]; } // If another proc wants a pointer, give it some. if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) { wbuf->nobj = nobj; wbuf = handoff(wbuf); nobj = wbuf->nobj; wp = &wbuf->obj[nobj]; } wp--; nobj--; b = *wp; n = arena_used - b; // scan until next bitBoundary or BitsDead ptrmask = nil; // use GC bitmap for pointer info scanobj: if(DebugPtrs) runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask); // Find bits of the beginning of the object. if(ptrmask == nil) { off = (uintptr*)b - (uintptr*)arena_start; ptrbitp = arena_start - off/wordsPerBitmapByte - 1; } for(i = 0; i < n; i += PtrSize) { obj = nil; // Find bits for this word. ...... if(bits <= BitsScalar) // BitsScalar || BitsDead continue; if(bits == BitsPointer) { obj = *(byte**)(b+i); obj0 = obj; goto markobj; } // With those three out of the way, must be multi-word. if(Debug && bits != BitsMultiWord) runtime·throw("unexpected garbage collection bits"); // Find the next pair of bits. if(ptrmask == nil) { bits = *ptrbitp; j = ((uintptr)b+i+PtrSize)/PtrSize & 1; ptrbitp -= j; bits >>= gcBits*j; bits = (bits>>2)&BitsMask; } else bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask; if(Debug && bits != BitsIface && bits != BitsEface) runtime·throw("unexpected garbage collection bits"); if(bits == BitsIface) { iface = (Iface*)(b+i); if(iface->tab != nil) { typ = iface->tab->type; if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers)) obj = iface->data; } } else { eface = (Eface*)(b+i); typ = eface->type; if(typ != nil) { if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers)) obj = eface->data; } } i += PtrSize; obj0 = obj; markobj: // At this point we have extracted the next potential pointer. // Check if it points into heap. if(obj == nil) continue; if(obj < arena_start || obj >= arena_used) { if((uintptr)obj < PhysPageSize && runtime·invalidptr) { s = nil; goto badobj; } continue; } // Mark the object. obj = (byte*)((uintptr)obj & ~(PtrSize-1)); off = (uintptr*)obj - (uintptr*)arena_start; bitp = arena_start - off/wordsPerBitmapByte - 1; shift = (off % wordsPerBitmapByte) * gcBits; xbits = *bitp; bits = (xbits >> shift) & bitMask; if((bits&bitBoundary) == 0) { // Not a beginning of a block, consult span table to find the block beginning. ...... obj = p; goto markobj; } if(DebugPtrs) runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj); if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) { // Running garbage collection again because // we want to find the path from a root to a bad pointer. // Found possible next step; extend or finish path. for(j=0; j<nbadblock; j++) if(badblock[j] == (uintptr)b) goto AlreadyBad; runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0)); if(ptrmask != nil) runtime·throw("bad pointer"); if(nbadblock >= nelem(badblock)) runtime·throw("badblock trace too long"); badblock[nbadblock++] = (uintptr)b; AlreadyBad:; } // Now we have bits, bitp, and shift correct for // obj pointing at the base of the object. // Only care about not marked objects. if((bits&bitMarked) != 0) continue; // If obj size is greater than 8, then each byte of GC bitmap // contains info for at most one object. In such case we use // non-atomic byte store to mark the object. This can lead // to double enqueue of the object for scanning, but scanning // is an idempotent operation, so it is OK. This cannot lead // to bitmap corruption because the single marked bit is the // only thing that can change in the byte. // For 8-byte objects we use non-atomic store, if the other // quadruple is already marked. Otherwise we resort to CAS // loop for marking. if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) || runtime·work.nproc == 1) *bitp = xbits | (bitMarked<<shift); else runtime·atomicor8(bitp, bitMarked<<shift); if(((xbits>>(shift+2))&BitsMask) == BitsDead) continue; // noscan object // Queue the obj for scanning. PREFETCH(obj); p = scanbuf[scanbufpos]; scanbuf[scanbufpos++] = obj; scanbufpos %= nelem(scanbuf); if(p == nil) continue; // If workbuf is full, obtain an empty one. if(nobj >= nelem(wbuf->obj)) { wbuf->nobj = nobj; wbuf = getempty(wbuf); nobj = wbuf->nobj; wp = &wbuf->obj[nobj]; } *wp = p; wp++; nobj++; } ............ }
}
static void
markroot(ParFor desc, uint32 i)
{
FinBlock fb;
MSpan s;
uint32 spanidx, sg;
G gp;
void *p;
uint32 status;
bool restart;USED(&desc); // Note: if you add a case here, please also update heapdump.c:dumproots. switch(i) { case RootData: scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata); break; case RootBss: scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata); break; case RootFinalizers: for(fb=runtime·allfin; fb; fb=fb->alllink) scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask); break; case RootSpans: // mark MSpan.specials ...... break; case RootFlushCaches: flushallmcaches(); break; default: // the rest is scanning goroutine stacks if(i - RootCount >= runtime·allglen) runtime·throw("markroot: bad index"); gp = runtime·allg[i - RootCount]; // remember when we've first observed the G blocked // needed only to output in traceback status = runtime·readgstatus(gp); if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0) gp->waitsince = runtime·work.tstart; // Shrink a stack if not much of it is being used. runtime·shrinkstack(gp); if(runtime·readgstatus(gp) == Gdead) gp->gcworkdone = true; else gp->gcworkdone = false; restart = runtime·stopg(gp); scanstack(gp); if(restart) runtime·restartg(gp); break; }
}
sweep过程
sweep扫描span里面每一个对象是否marked,将未marked的对象放入span的freelist中
如果span中的所有对象都进入了freelist,那么会将span的内存释放到heap中。
// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
MSpan *s;
uint32 idx, sg;
uintptr npages;
// increment locks to ensure that the goroutine is not preempted
// in the middle of sweep thus leaving the span in an inconsistent state for next GC
g->m->locks++;
sg = runtime·mheap.sweepgen;
for(;;) {
idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
if(idx >= runtime·work.nspan) {
runtime·mheap.sweepdone = true;
g->m->locks--;
return -1;
}
s = runtime·work.spans[idx];
if(s->state != MSpanInUse) {
s->sweepgen = sg;
continue;
}
if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
continue;
npages = s->npages;
if(!runtime·MSpan_Sweep(s, false))
npages = 0;
g->m->locks--;
return npages;
}
}
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
int32 cl, n, npages, nfree;
uintptr size, off, step;
uint32 sweepgen;
byte *p, *bitp, shift, xbits, bits;
MCache *c;
byte *arena_start;
MLink head, *end, *link;
Special *special, **specialp, *y;
bool res, sweepgenset;
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
runtime·throw("MSpan_Sweep: m is not locked");
sweepgen = runtime·mheap.sweepgen;
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state");
}
arena_start = runtime·mheap.arena_start;
cl = s->sizeclass;
size = s->elemsize;
if(cl == 0) {
n = 1;
} else {
// Chunk full of small blocks.
npages = runtime·class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
res = false;
nfree = 0;
end = &head;
c = g->m->mcache;
sweepgenset = false;
// Mark any free objects in this span so we don't collect them.
for(link = s->freelist; link != nil; link = link->next) {
off = (uintptr*)link - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
*bitp |= bitMarked<<shift;
}
// Unlink & free special records for any objects we're about to free.
specialp = &s->specials;
special = *specialp;
while(special != nil) {
// A finalizer can be set for an inner byte of an object, find object beginning.
p = (byte*)(s->start << PageShift) + special->offset/size*size;
off = (uintptr*)p - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
bits = (*bitp>>shift) & bitMask;
if((bits&bitMarked) == 0) {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p = (byte*)(s->start << PageShift) + special->offset;
// about to free object: splice out special record
y = special;
special = special->next;
*specialp = special;
if(!runtime·freespecial(y, p, size, false)) {
// stop freeing of object if it has a finalizer
*bitp |= bitMarked << shift;
}
} else {
// object is still live: keep special record
specialp = &special->next;
special = *specialp;
}
}
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
p = (byte*)(s->start << PageShift);
// Find bits for the beginning of the span.
off = (uintptr*)p - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = 0;
step = size/(PtrSize*wordsPerBitmapByte);
// Rewind to the previous quadruple as we move to the next
// in the beginning of the loop.
bitp += step;
if(step == 0) {
// 8-byte objects.
bitp++;
shift = gcBits;
}
for(; n > 0; n--, p += size) {
bitp -= step;
if(step == 0) {
if(shift != 0)
bitp--;
shift = gcBits - shift;
}
xbits = *bitp;
bits = (xbits>>shift) & bitMask;
// Allocated and marked object, reset bits to allocated.
if((bits&bitMarked) != 0) {
*bitp &= ~(bitMarked<<shift);
continue;
}
// At this point we know that we are looking at garbage object
// that needs to be collected.
if(runtime·debug.allocfreetrace)
runtime·tracefree(p, size);
// Reset to allocated+noscan.
*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
if(cl == 0) {
// Free large span.
if(preserve)
runtime·throw("can't preserve large span");
runtime·unmarkspan(p, s->npages<<PageShift);
s->needzero = 1;
// important to set sweepgen before returning it to heap
runtime·atomicstore(&s->sweepgen, sweepgen);
sweepgenset = true;
// NOTE(rsc,dvyukov): The original implementation of efence
// in CL 22060046 used SysFree instead of SysFault, so that
// the operating system would eventually give the memory
// back to us again, so that an efence program could run
// longer without running out of memory. Unfortunately,
// calling SysFree here without any kind of adjustment of the
// heap data structures means that when the memory does
// come back to us, we have the wrong metadata for it, either in
// the MSpan structures or in the garbage collection bitmap.
// Using SysFault here means that the program will run out of
// memory fairly quickly in efence mode, but at least it won't
// have mysterious crashes due to confused memory reuse.
// It should be possible to switch back to SysFree if we also
// implement and then call some kind of MHeap_DeleteSpan.
if(runtime·debug.efence) {
s->limit = nil; // prevent mlookup from finding this span
runtime·SysFault(p, size);
} else
runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_nlargefree++;
c->local_largefree += size;
runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
res = true;
} else {
// Free small object.
if(size > 2*sizeof(uintptr))
((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
else if(size > sizeof(uintptr))
((uintptr*)p)[1] = 0;
end->next = (MLink*)p;
end = (MLink*)p;
nfree++;
}
}
// We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.
if(!sweepgenset && nfree == 0) {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state after sweep");
}
runtime·atomicstore(&s->sweepgen, sweepgen);
}
if(nfree > 0) {
c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size;
runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
// MCentral_FreeSpan updates sweepgen
}
return res;
}
有疑问加站长微信联系(非本文作者)