go语言动态内存的申请和释放设计来自于tcmalloc
主要数据结构:
MHeap:the malloc heap,管理page
MCentral:特定类型小对象共享的free list
MCache:线程本地小对象的共享free list
分配小对象
- 查找MCache相应大小的free list,如果free list非空,从free list中直接获取,
这种情况下不需要任何锁的开销 - 如果MCache的free list为空,则从MCentral获取一些free object
- 如果MCentral的free list为空,则从MHeap申请一些page,然后将page内存加入到相应MCentral的free list
- 如果MHeap缓存的page不足,则从操作系统申请一些page(至少1M)
分配大对象
大对象直接从MHeap从分配
申请动态内存
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
if size == 0 {
return unsafe.Pointer(&zerobase)
}
size0 := size
if flags&flagNoScan == 0 && typ == nil {
gothrow("malloc missing type")
}
// This function must be atomic wrt GC, but for performance reasons
// we don't acquirem/releasem on fast path. The code below does not have
// split stack checks, so it can't be preempted by GC.
// Functions like roundup/add are inlined. And onM/racemalloc are nosplit.
// If debugMalloc = true, these assumptions are checked below.
if debugMalloc {
mp := acquirem()
if mp.mallocing != 0 {
gothrow("malloc deadlock")
}
mp.mallocing = 1
if mp.curg != nil {
mp.curg.stackguard0 = ^uintptr(0xfff) | 0xbad
}
}
c := gomcache()
var s *mspan
var x unsafe.Pointer
if size <= maxSmallSize {
if flags&flagNoScan != 0 && size < maxTinySize {
// Tiny allocator.
//
// Tiny allocator combines several tiny allocation requests
// into a single memory block. The resulting memory block
// is freed when all subobjects are unreachable. The subobjects
// must be FlagNoScan (don't have pointers), this ensures that
// the amount of potentially wasted memory is bounded.
//
// Size of the memory block used for combining (maxTinySize) is tunable.
// Current setting is 16 bytes, which relates to 2x worst case memory
// wastage (when all but one subobjects are unreachable).
// 8 bytes would result in no wastage at all, but provides less
// opportunities for combining.
// 32 bytes provides more opportunities for combining,
// but can lead to 4x worst case wastage.
// The best case winning is 8x regardless of block size.
//
// Objects obtained from tiny allocator must not be freed explicitly.
// So when an object will be freed explicitly, we ensure that
// its size >= maxTinySize.
//
// SetFinalizer has a special case for objects potentially coming
// from tiny allocator, it such case it allows to set finalizers
// for an inner byte of a memory block.
//
// The main targets of tiny allocator are small strings and
// standalone escaping variables. On a json benchmark
// the allocator reduces number of allocations by ~12% and
// reduces heap size by ~20%.
tinysize := uintptr(c.tinysize)
if size <= tinysize {
tiny := unsafe.Pointer(c.tiny)
// Align tiny pointer for required (conservative) alignment.
if size&7 == 0 {
tiny = roundup(tiny, 8)
} else if size&3 == 0 {
tiny = roundup(tiny, 4)
} else if size&1 == 0 {
tiny = roundup(tiny, 2)
}
size1 := size + (uintptr(tiny) - uintptr(unsafe.Pointer(c.tiny)))
if size1 <= tinysize {
// The object fits into existing tiny block.
x = tiny
c.tiny = (*byte)(add(x, size))
c.tinysize -= uintptr(size1)
c.local_tinyallocs++
if debugMalloc {
mp := acquirem()
if mp.mallocing == 0 {
gothrow("bad malloc")
}
mp.mallocing = 0
if mp.curg != nil {
mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
}
// Note: one releasem for the acquirem just above.
// The other for the acquirem at start of malloc.
releasem(mp)
releasem(mp)
}
return x
}
}
// Allocate a new maxTinySize block.
s = c.alloc[tinySizeClass]
v := s.freelist
if v == nil {
mp := acquirem()
mp.scalararg[0] = tinySizeClass
onM(mcacheRefill_m)
releasem(mp)
s = c.alloc[tinySizeClass]
v = s.freelist
}
s.freelist = v.next
s.ref++
//TODO: prefetch v.next
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
// See if we need to replace the existing tiny block with the new one
// based on amount of remaining free space.
if maxTinySize-size > tinysize {
c.tiny = (*byte)(add(x, size))
c.tinysize = uintptr(maxTinySize - size)
}
size = maxTinySize
} else {
var sizeclass int8
if size <= 1024-8 {
sizeclass = size_to_class8[(size+7)>>3]
} else {
sizeclass = size_to_class128[(size-1024+127)>>7]
}
size = uintptr(class_to_size[sizeclass])
s = c.alloc[sizeclass]
v := s.freelist
if v == nil {
mp := acquirem()
mp.scalararg[0] = uintptr(sizeclass)
onM(mcacheRefill_m)
releasem(mp)
s = c.alloc[sizeclass]
v = s.freelist
}
s.freelist = v.next
s.ref++
//TODO: prefetch
x = unsafe.Pointer(v)
if flags&flagNoZero == 0 {
v.next = nil
if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
memclr(unsafe.Pointer(v), size)
}
}
}
c.local_cachealloc += intptr(size)
} else {
mp := acquirem()
mp.scalararg[0] = uintptr(size)
mp.scalararg[1] = uintptr(flags)
onM(largeAlloc_m)
s = (*mspan)(mp.ptrarg[0])
mp.ptrarg[0] = nil
releasem(mp)
x = unsafe.Pointer(uintptr(s.start << pageShift))
size = uintptr(s.elemsize)
}
if flags&flagNoScan != 0 {
// All objects are pre-marked as noscan.
goto marked
}
// If allocating a defer+arg block, now that we've picked a malloc size
// large enough to hold everything, cut the "asked for" size down to
// just the defer header, so that the GC bitmap will record the arg block
// as containing nothing at all (as if it were unused space at the end of
// a malloc block caused by size rounding).
// The defer arg areas are scanned as part of scanstack.
if typ == deferType {
size0 = unsafe.Sizeof(_defer{})
}
// From here till marked label marking the object as allocated
// and storing type info in the GC bitmap.
{
arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
off := (uintptr(x) - arena_start) / ptrSize
xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
shift := (off % wordsPerBitmapByte) * gcBits
if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
gothrow("bad bits in markallocated")
}
var ti, te uintptr
var ptrmask *uint8
if size == ptrSize {
// It's one word and it has pointers, it must be a pointer.
*xbits |= (bitsPointer << 2) << shift
goto marked
}
if typ.kind&kindGCProg != 0 {
nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
masksize := nptr
if masksize%2 != 0 {
masksize *= 2 // repeated
}
masksize = masksize * pointersPerByte / 8 // 4 bits per word
masksize++ // unroll flag in the beginning
if masksize > maxGCMask && typ.gc[1] != 0 {
// If the mask is too large, unroll the program directly
// into the GC bitmap. It's 7 times slower than copying
// from the pre-unrolled mask, but saves 1/16 of type size
// memory for the mask.
mp := acquirem()
mp.ptrarg[0] = x
mp.ptrarg[1] = unsafe.Pointer(typ)
mp.scalararg[0] = uintptr(size)
mp.scalararg[1] = uintptr(size0)
onM(unrollgcproginplace_m)
releasem(mp)
goto marked
}
ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
// Check whether the program is already unrolled.
if uintptr(atomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 {
mp := acquirem()
mp.ptrarg[0] = unsafe.Pointer(typ)
onM(unrollgcprog_m)
releasem(mp)
}
ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
} else {
ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
}
if size == 2*ptrSize {
*xbits = *ptrmask | bitBoundary
goto marked
}
te = uintptr(typ.size) / ptrSize
// If the type occupies odd number of words, its mask is repeated.
if te%2 == 0 {
te /= 2
}
// Copy pointer bitmask into the bitmap.
for i := uintptr(0); i < size0; i += 2 * ptrSize {
v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
ti++
if ti == te {
ti = 0
}
if i == 0 {
v |= bitBoundary
}
if i+ptrSize == size0 {
v &^= uint8(bitPtrMask << 4)
}
*xbits = v
xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
}
if size0%(2*ptrSize) == 0 && size0 < size {
// Mark the word after last object's word as bitsDead.
*xbits = bitsDead << 2
}
}
marked:
if raceenabled {
racemalloc(x, size)
}
if debugMalloc {
mp := acquirem()
if mp.mallocing == 0 {
gothrow("bad malloc")
}
mp.mallocing = 0
if mp.curg != nil {
mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
}
// Note: one releasem for the acquirem just above.
// The other for the acquirem at start of malloc.
releasem(mp)
releasem(mp)
}
if debug.allocfreetrace != 0 {
tracealloc(x, size, typ)
}
if rate := MemProfileRate; rate > 0 {
if size < uintptr(rate) && int32(size) < c.next_sample {
c.next_sample -= int32(size)
} else {
mp := acquirem()
profilealloc(mp, x, size)
releasem(mp)
}
}
if memstats.heap_alloc >= memstats.next_gc {
gogc(0)
}
return x
}
释放动态内存
go不存在类似C中的free函数,动态内存的释放是由GC进行的,每次释放不是单独一个对象,而是一个span中n个对象
// Free n objects from a span s back into the central free list c.
// Called during sweep.
// Returns true if the span was returned to heap. Sets sweepgen to
// the latest generation.
// If preserve=true, don't return the span to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve)
{
bool wasempty;
if(s->incache)
runtime·throw("freespan into cached span");
// Add the objects back to s's free list.
wasempty = s->freelist == nil;
end->next = s->freelist;
s->freelist = start;
s->ref -= n;
if(preserve) {
// preserve is set only when called from MCentral_CacheSpan above,
// the span must be in the empty list.
if(s->next == nil)
runtime·throw("can't preserve unlinked span");
runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
return false;
}
runtime·lock(&c->lock);
// Move to nonempty if necessary.
if(wasempty) {
runtime·MSpanList_Remove(s);
runtime·MSpanList_Insert(&c->nonempty, s);
}
// delay updating sweepgen until here. This is the signal that
// the span may be used in an MCache, so it must come after the
// linked list operations above (actually, just after the
// lock of c above.)
runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
if(s->ref != 0) {
runtime·unlock(&c->lock);
return false;
}
// s is completely freed, return it to the heap.
runtime·MSpanList_Remove(s);
s->needzero = 1;
s->freelist = nil;
runtime·unlock(&c->lock);
runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
runtime·MHeap_Free(&runtime·mheap, s, 0);
return true;
}
有疑问加站长微信联系(非本文作者)