go语言的GC

richmonkey · 2015-05-18 03:00:01 · 4840 次点击 · 预计阅读时间 17 分钟 · 大约8小时之前开始浏览

这是一个创建于 2015-05-18 03:00:01 的文章，其中的信息可能已经有所发展或是发生改变。

mark&sweep, 2分钟保证至少一次GC过程，如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
进行mark的过程中，会停止一切G的运行，mark的过程是多任务并发的
sweep的过程是分散的

mark过程

整个程序内存块包括 .data， .bss，每个G的stack， SpecialFinalizer
每段内存都有其相应的bitmap，用来表示每个word（8BYTE）的标志位，每word需要4bit的标志位
mark的过程就是递归遍历内存块bitmap的过程
word标志位有如下3种类型:

标量
指针

连续两个word表示一个iface或eface

// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
// unscanned objects left. Instead of using an explicit recursion, it keeps
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
static void
scanblock(byte b, uintptr n, byte ptrmask)
{
byte obj, obj0, p, arena_start, *arena_used, **wp, scanbuf[8], ptrbitp, bitp;
uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
Workbuf wbuf;
Iface iface;
Eface eface;
Type typ;
MSpan s;
pageID k;
bool keepworking;

// Cache memory arena parameters in local vars.
arena_start = runtime·mheap.arena_start;
arena_used = runtime·mheap.arena_used;

wbuf = getempty(nil);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
keepworking = b == nil;
scanbufpos = 0;
for(i = 0; i < nelem(scanbuf); i++)
	scanbuf[i] = nil;

ptrbitp = nil;

// ptrmask can have 2 possible values:
// 1. nil - obtain pointer mask from GC bitmap.
// 2. pointer to a compact mask (for stacks and data).
if(b != nil)
	goto scanobj;
for(;;) {
	if(nobj == 0) {
		// Out of work in workbuf.
		// First, see is there is any work in scanbuf.
		for(i = 0; i < nelem(scanbuf); i++) {
			b = scanbuf[scanbufpos];
			scanbuf[scanbufpos++] = nil;
			scanbufpos %= nelem(scanbuf);
			if(b != nil) {
				n = arena_used - b; // scan until bitBoundary or BitsDead
				ptrmask = nil; // use GC bitmap for pointer info
				goto scanobj;
			}
		}
		if(!keepworking) {
			putempty(wbuf);
			return;
		}
		// Refill workbuf from global queue.
		wbuf = getfull(wbuf);
		if(wbuf == nil)
			return;
		nobj = wbuf->nobj;
		wp = &wbuf->obj[nobj];
	}

	// If another proc wants a pointer, give it some.
	if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
		wbuf->nobj = nobj;
		wbuf = handoff(wbuf);
		nobj = wbuf->nobj;
		wp = &wbuf->obj[nobj];
	}

	wp--;
	nobj--;
	b = *wp;
	n = arena_used - b; // scan until next bitBoundary or BitsDead
	ptrmask = nil; // use GC bitmap for pointer info

scanobj:
	if(DebugPtrs)
		runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
	// Find bits of the beginning of the object.
	if(ptrmask == nil) {
		off = (uintptr*)b - (uintptr*)arena_start;
		ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
	}
	for(i = 0; i < n; i += PtrSize) {
		obj = nil;
		// Find bits for this word.
        ......

		if(bits <= BitsScalar) // BitsScalar || BitsDead
			continue;
		if(bits == BitsPointer) {
			obj = *(byte**)(b+i);
			obj0 = obj;
			goto markobj;
		}

		// With those three out of the way, must be multi-word.
		if(Debug && bits != BitsMultiWord)
			runtime·throw("unexpected garbage collection bits");
		// Find the next pair of bits.
		if(ptrmask == nil) {
			bits = *ptrbitp;
			j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
			ptrbitp -= j;
			bits >>= gcBits*j;
			bits = (bits>>2)&BitsMask;
		} else
			bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;

		if(Debug && bits != BitsIface && bits != BitsEface)
			runtime·throw("unexpected garbage collection bits");

		if(bits == BitsIface) {
			iface = (Iface*)(b+i);
			if(iface->tab != nil) {
				typ = iface->tab->type;
				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
					obj = iface->data;
			}
		} else {
			eface = (Eface*)(b+i);
			typ = eface->type;
			if(typ != nil) {
				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
					obj = eface->data;
			}
		}

		i += PtrSize;

		obj0 = obj;
	markobj:
		// At this point we have extracted the next potential pointer.
		// Check if it points into heap.
		if(obj == nil)
			continue;
		if(obj < arena_start || obj >= arena_used) {
			if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
				s = nil;
				goto badobj;
			}
			continue;
		}
		// Mark the object.
		obj = (byte*)((uintptr)obj & ~(PtrSize-1));
		off = (uintptr*)obj - (uintptr*)arena_start;
		bitp = arena_start - off/wordsPerBitmapByte - 1;
		shift = (off % wordsPerBitmapByte) * gcBits;
		xbits = *bitp;
		bits = (xbits >> shift) & bitMask;
		if((bits&bitBoundary) == 0) {
			// Not a beginning of a block, consult span table to find the block beginning.
            ......

			obj = p;
			goto markobj;
		}
		if(DebugPtrs)
			runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);

		if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
			// Running garbage collection again because
			// we want to find the path from a root to a bad pointer.
			// Found possible next step; extend or finish path.
			for(j=0; j<nbadblock; j++)
				if(badblock[j] == (uintptr)b)
					goto AlreadyBad;
			runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
			if(ptrmask != nil)
				runtime·throw("bad pointer");
			if(nbadblock >= nelem(badblock))
				runtime·throw("badblock trace too long");
			badblock[nbadblock++] = (uintptr)b;
		AlreadyBad:;
		}

		// Now we have bits, bitp, and shift correct for
		// obj pointing at the base of the object.
		// Only care about not marked objects.
		if((bits&bitMarked) != 0)
			continue;
		// If obj size is greater than 8, then each byte of GC bitmap
		// contains info for at most one object. In such case we use
		// non-atomic byte store to mark the object. This can lead
		// to double enqueue of the object for scanning, but scanning
		// is an idempotent operation, so it is OK. This cannot lead
		// to bitmap corruption because the single marked bit is the
		// only thing that can change in the byte.
		// For 8-byte objects we use non-atomic store, if the other
		// quadruple is already marked. Otherwise we resort to CAS
		// loop for marking.
		if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
			runtime·work.nproc == 1)
			*bitp = xbits | (bitMarked<<shift);
		else
			runtime·atomicor8(bitp, bitMarked<<shift);

		if(((xbits>>(shift+2))&BitsMask) == BitsDead)
			continue;  // noscan object

		// Queue the obj for scanning.
		PREFETCH(obj);
		p = scanbuf[scanbufpos];
		scanbuf[scanbufpos++] = obj;
		scanbufpos %= nelem(scanbuf);
		if(p == nil)
			continue;

		// If workbuf is full, obtain an empty one.
		if(nobj >= nelem(wbuf->obj)) {
			wbuf->nobj = nobj;
			wbuf = getempty(wbuf);
			nobj = wbuf->nobj;
			wp = &wbuf->obj[nobj];
		}
		*wp = p;
		wp++;
		nobj++;
	}
    ............
}

}

static void
markroot(ParFor desc, uint32 i)
{
FinBlock fb;
MSpan s;
uint32 spanidx, sg;
G gp;
void *p;
uint32 status;
bool restart;

USED(&desc);
// Note: if you add a case here, please also update heapdump.c:dumproots.
switch(i) {
case RootData:
	scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
	break;

case RootBss:
	scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
	break;

case RootFinalizers:
	for(fb=runtime·allfin; fb; fb=fb->alllink)
		scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
	break;

case RootSpans:
	// mark MSpan.specials
    ......
	break;

case RootFlushCaches:
	flushallmcaches();
	break;

default:
	// the rest is scanning goroutine stacks
	if(i - RootCount >= runtime·allglen)
		runtime·throw("markroot: bad index");
	gp = runtime·allg[i - RootCount];
	// remember when we've first observed the G blocked
	// needed only to output in traceback
	status = runtime·readgstatus(gp);
	if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
		gp->waitsince = runtime·work.tstart;
	// Shrink a stack if not much of it is being used.
	runtime·shrinkstack(gp);
	if(runtime·readgstatus(gp) == Gdead) 
		gp->gcworkdone = true;
	else 
		gp->gcworkdone = false; 
	restart = runtime·stopg(gp);
	scanstack(gp);
	if(restart)
		runtime·restartg(gp);
	break;
}

}

sweep过程

sweep扫描span里面每一个对象是否marked，将未marked的对象放入span的freelist中
如果span中的所有对象都进入了freelist，那么会将span的内存释放到heap中。

// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
    MSpan *s;
    uint32 idx, sg;
    uintptr npages;
 
    // increment locks to ensure that the goroutine is not preempted
    // in the middle of sweep thus leaving the span in an inconsistent state for next GC
    g->m->locks++;
    sg = runtime·mheap.sweepgen;
    for(;;) {
    	idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
    	if(idx >= runtime·work.nspan) {
    		runtime·mheap.sweepdone = true;
    		g->m->locks--;
    		return -1;
    	}
    	s = runtime·work.spans[idx];
    	if(s->state != MSpanInUse) {
    		s->sweepgen = sg;
    		continue;
    	}
    	if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
    		continue;
    	npages = s->npages;
    	if(!runtime·MSpan_Sweep(s, false))
    		npages = 0;
    	g->m->locks--;
    	return npages;
    }
}
 
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
    int32 cl, n, npages, nfree;
    uintptr size, off, step;
    uint32 sweepgen;
    byte *p, *bitp, shift, xbits, bits;
    MCache *c;
    byte *arena_start;
    MLink head, *end, *link;
    Special *special, **specialp, *y;
    bool res, sweepgenset;
 
    // It's critical that we enter this function with preemption disabled,
    // GC must not start while we are in the middle of this function.
    if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
    	runtime·throw("MSpan_Sweep: m is not locked");
    sweepgen = runtime·mheap.sweepgen;
    if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
    	runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
    		s->state, s->sweepgen, sweepgen);
    	runtime·throw("MSpan_Sweep: bad span state");
    }
    arena_start = runtime·mheap.arena_start;
    cl = s->sizeclass;
    size = s->elemsize;
    if(cl == 0) {
    	n = 1;
    } else {
    	// Chunk full of small blocks.
    	npages = runtime·class_to_allocnpages[cl];
    	n = (npages << PageShift) / size;
    }
    res = false;
    nfree = 0;
    end = &head;
    c = g->m->mcache;
    sweepgenset = false;
 
    // Mark any free objects in this span so we don't collect them.
    for(link = s->freelist; link != nil; link = link->next) {
    	off = (uintptr*)link - (uintptr*)arena_start;
    	bitp = arena_start - off/wordsPerBitmapByte - 1;
    	shift = (off % wordsPerBitmapByte) * gcBits;
    	*bitp |= bitMarked<<shift;
    }
 
    // Unlink & free special records for any objects we're about to free.
    specialp = &s->specials;
    special = *specialp;
    while(special != nil) {
    	// A finalizer can be set for an inner byte of an object, find object beginning.
    	p = (byte*)(s->start << PageShift) + special->offset/size*size;
    	off = (uintptr*)p - (uintptr*)arena_start;
    	bitp = arena_start - off/wordsPerBitmapByte - 1;
    	shift = (off % wordsPerBitmapByte) * gcBits;
    	bits = (*bitp>>shift) & bitMask;
    	if((bits&bitMarked) == 0) {
    		// Find the exact byte for which the special was setup
    		// (as opposed to object beginning).
    		p = (byte*)(s->start << PageShift) + special->offset;
    		// about to free object: splice out special record
    		y = special;
    		special = special->next;
    		*specialp = special;
    		if(!runtime·freespecial(y, p, size, false)) {
    			// stop freeing of object if it has a finalizer
    			*bitp |= bitMarked << shift;
    		}
    	} else {
    		// object is still live: keep special record
    		specialp = &special->next;
    		special = *specialp;
    	}
    }
 
    // Sweep through n objects of given size starting at p.
    // This thread owns the span now, so it can manipulate
    // the block bitmap without atomic operations.
    p = (byte*)(s->start << PageShift);
    // Find bits for the beginning of the span.
    off = (uintptr*)p - (uintptr*)arena_start;
    bitp = arena_start - off/wordsPerBitmapByte - 1;
    shift = 0;
    step = size/(PtrSize*wordsPerBitmapByte);
    // Rewind to the previous quadruple as we move to the next
    // in the beginning of the loop.
    bitp += step;
    if(step == 0) {
    	// 8-byte objects.
    	bitp++;
    	shift = gcBits;
    }
    for(; n > 0; n--, p += size) {
    	bitp -= step;
    	if(step == 0) {
    		if(shift != 0)
    			bitp--;
    		shift = gcBits - shift;
    	}
 
    	xbits = *bitp;
    	bits = (xbits>>shift) & bitMask;
 
    	// Allocated and marked object, reset bits to allocated.
    	if((bits&bitMarked) != 0) {
    		*bitp &= ~(bitMarked<<shift);
    		continue;
    	}
    	// At this point we know that we are looking at garbage object
    	// that needs to be collected.
    	if(runtime·debug.allocfreetrace)
    		runtime·tracefree(p, size);
    	// Reset to allocated+noscan.
    	*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
    	if(cl == 0) {
    		// Free large span.
    		if(preserve)
    			runtime·throw("can't preserve large span");
    		runtime·unmarkspan(p, s->npages<<PageShift);
    		s->needzero = 1;
    		// important to set sweepgen before returning it to heap
    		runtime·atomicstore(&s->sweepgen, sweepgen);
    		sweepgenset = true;
    		// NOTE(rsc,dvyukov): The original implementation of efence
    		// in CL 22060046 used SysFree instead of SysFault, so that
    		// the operating system would eventually give the memory
    		// back to us again, so that an efence program could run
    		// longer without running out of memory. Unfortunately,
    		// calling SysFree here without any kind of adjustment of the
    		// heap data structures means that when the memory does
    		// come back to us, we have the wrong metadata for it, either in
    		// the MSpan structures or in the garbage collection bitmap.
    		// Using SysFault here means that the program will run out of
    		// memory fairly quickly in efence mode, but at least it won't
    		// have mysterious crashes due to confused memory reuse.
    		// It should be possible to switch back to SysFree if we also
    		// implement and then call some kind of MHeap_DeleteSpan.
    		if(runtime·debug.efence) {
    			s->limit = nil;	// prevent mlookup from finding this span
    			runtime·SysFault(p, size);
    		} else
    			runtime·MHeap_Free(&runtime·mheap, s, 1);
    		c->local_nlargefree++;
    		c->local_largefree += size;
    		runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
    		res = true;
    	} else {
    		// Free small object.
    		if(size > 2*sizeof(uintptr))
    			((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
    		else if(size > sizeof(uintptr))
    			((uintptr*)p)[1] = 0;
 
    		end->next = (MLink*)p;
    		end = (MLink*)p;
    		nfree++;
    	}
    }
 
    // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
    // because of the potential for a concurrent free/SetFinalizer.
    // But we need to set it before we make the span available for allocation
    // (return it to heap or mcentral), because allocation code assumes that a
    // span is already swept if available for allocation.
 
    if(!sweepgenset && nfree == 0) {
    	// The span must be in our exclusive ownership until we update sweepgen,
    	// check for potential races.
    	if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
    		runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
    			s->state, s->sweepgen, sweepgen);
    		runtime·throw("MSpan_Sweep: bad span state after sweep");
    	}
    	runtime·atomicstore(&s->sweepgen, sweepgen);
    }
    if(nfree > 0) {
    	c->local_nsmallfree[cl] += nfree;
    	c->local_cachealloc -= nfree * size;
    	runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
    	res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
    	// MCentral_FreeSpan updates sweepgen
    }
    return res;
}

有疑问加站长微信联系（非本文作者）

本文来自：博客园

感谢作者：richmonkey

查看原文：go语言的GC

入群交流（和以上内容无关）：加入Go大咖交流群，或添加微信：liuxiaoyan-s 备注：入群；或加QQ群：692541889

4840 次点击

加入收藏微博

收入我的专栏

上一篇：go语言调度器

下一篇：go语言的chan

runtime

goroutine

遍历

标量

0 回复

暂无回复

添加一条新回复（您需要登录后才能回复没有账号？）

请尽量让自己的回复能够对别人有帮助
支持 Markdown 格式, **粗体**、~~删除线~~、`单行代码`
支持 @ 本站用户；支持表情（输入 : 提示），见 Emoji cheat sheet
图片支持拖拽、截图粘贴等方式上传

关注我

扫码关注领全套学习资料
加入 QQ 群：
- 192706294（已满）
- 731990104（已满）
- 798786647（已满）
- 729884609（已满）
- 977810755（已满）
- 815126783（已满）
- 812540095（已满）
- 1006366459（已满）
- 692541889
加入微信群：liuxiaoyan-s，备注入群
也欢迎加入知识星球 Go粉丝们（免费）

go语言的GC

mark过程

sweep过程

用户登录

今日阅读排行

一周阅读排行

关注我

mark过程

sweep过程

go语言的GC

mark过程

sweep过程

用户登录

今日阅读排行

一周阅读排行

关注我

给该专栏投稿 写篇新文章

收入到我管理的专栏 新建专栏

mark过程

sweep过程

给该专栏投稿写篇新文章

收入到我管理的专栏新建专栏