golang 源码剖析(3): 内存分配

  1. 先从操作系统申请一块大内存,以减少系统调用
  2. 将申请到的内存按照特定大小预先切成小块,构成一个链表
  3. 为对象分配内存时,只需从链表中取出一个大小合适的块使用就好
  4. 回收对象内存是,只需将对象放回原链表,以便服用
  5. 闲置过多时,会将部分内存归还系统,降低整体开销



  • span: 有多个地址连续的页(page)组成的大块内存
  • object: 将span按特定大小分成多个小块, 每个小块可存储一个对象


const (
    _MaxSmallSize   = 32768 //最大小对象size, 32*1024byte
    smallSizeDiv    = 8
    smallSizeMax    = 1024
    largeSizeDiv    = 128
    _NumSizeClasses = 67 //size类别数量
    _PageShift      = 13
    _PageSize = 1 << _PageShift // 8kb

freeindex则是用于发现下一个free object用的
allocBits 是gc bitmap
sweepgen 用来标志gc清理状态

type mspan struct {
    next *mspan     // next span in list, or nil if none
    prev *mspan     // previous span in list, or nil if none

    startAddr uintptr // address of first byte of span aka s.base()
    npages    uintptr // number of pages in span
    manualFreeList gclinkptr // list of free objects in mSpanManual spans

    // freeindex is the slot index between 0 and nelems at which to begin scanning
    // for the next free object in this span.
    // Each allocation scans allocBits starting at freeindex until it encounters a 0
    // indicating a free object. freeindex is then adjusted so that subsequent scans begin
    // just past the newly discovered free object.
    // If freeindex == nelem, this span has no free objects.
    // allocBits is a bitmap of objects in this span.
    // If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
    // then object n is free;
    // otherwise, object n is allocated. Bits starting at nelem are
    // undefined and should never be referenced.
    // Object n starts at address n*elemsize + (start << pageShift).
    freeindex uintptr
    allocBits  *gcBits
    spanclass   spanClass  // size class and noscan (uint8)
    elemsize    uintptr    // computed from sizeclass or from npages
    // sweep generation:
    // if sweepgen == h->sweepgen - 2, the span needs sweeping
    // if sweepgen == h->sweepgen - 1, the span is currently being swept
    // if sweepgen == h->sweepgen, the span is swept and ready to use
    // if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
    // if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
    // h->sweepgen is incremented by 2 after every GC

    sweepgen    uint32

size根据align递增,同时对齐, 这里计算了最小浪费和最大浪费

    align := 8
    for size := align; size <= maxSmallSize; size += align {
        if powerOfTwo(size) { // bump alignment once in a while
            if size >= 2048 {
                align = 256
            } else if size >= 128 {
                align = size / 8
            } else if size >= 16 {
                align = 16 // required for x86 SSE instructions, if we want to use them

    spanSize := c.npages * pageSize
    objects := spanSize / c.size
    tailWaste := spanSize - c.size*(spanSize/c.size) //这里的浪费是对齐引起的浪费
    maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize) // 这里是对其浪费再加上装入前一个size-class的对象引起的浪费



type mheap struct {
    // lock must only be acquired on the system stack, otherwise a g
    // could self-deadlock if its stack grows with the lock held.
    lock      mutex
    free      mTreap // free spans
    // central free lists for small size classes.
    // the padding makes sure that the mcentrals are
    // spaced CacheLinePadSize bytes apart, so that each mcentral.lock
    // gets its own cache line.
    // central is indexed by spanClass.
    central [numSpanClasses]struct {
        mcentral mcentral
        pad      [cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize]byte


type mcentral struct {
    lock      mutex
    spanclass spanClass
    nonempty  mSpanList // list of spans with a free object, ie a nonempty free list
    empty     mSpanList // list of spans with no free objects (or cached in an mcache)

    // nmalloc is the cumulative count of objects allocated from
    // this mcentral, assuming all spans in mcaches are
    // fully-allocated. Written atomically, read under STW.
    nmalloc uint64


type mcache struct {
    // tiny is a heap pointer. Since mcache is in non-GC'd memory,
    // we handle it by clearing it in releaseAll during mark
    // termination.
    tiny             uintptr
    tinyoffset       uintptr
    local_tinyallocs uintptr // number of tiny allocs not counted in other stats
    alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass


numSpanClasses = _NumSizeClasses << 1

  1. 计算待分配对象的规格(size-class)
  2. 先找到对应sizeclass的span,根据span.freeindex尝试nextFreeFast(将下一个空闲对象地址放在这里,可快速获取),从span.allocCache中查找是否有空闲的object,如果有则直接返回
  3. 从cache.alloc数组中知道相同size-class的span,如果存在则返回
  4. 如果不存在,会在之后启动gc,同时通过mheap_.central[spc].mcentral.cacheSpan()轮训mcentral.nonempty去获取一个新的span,(这里会用到锁,因为mcentral是多线程共享的)
  5. 如果 mcentral.nonempty()找不到,则会从mcentral.empty中循环做垃圾回收(在这里将s.freeindex重置为0),知道找到一个可用的span
  6. 如果还是找不到会调用mcentral.grow()通过mheap.alloc获取
  7. mheap会现在mheap.free中根据需要的page数查找合适的树节点,如果找到则初始化成span,如果找不到则调用h.sysAlloc(askSize)去申请内存
  8. 这里涉及到arena的大小,也就是预先保留的虚地址,如果够用,则直接指定内存startAddress获取内存,
    如果不够,则需要调用sysReservep, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)去预留一些虚内存,并添加到mheap_.areanHints中去,最后才调用sysMap也就是底层的mmapmmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) 申请可用的内存.



  • cache因为是工作线程私有的,故而可以实现高性能无锁分配的核心
  • central是为了在多个线程之间回收再分配,将别的线程回收的内存继续使用,提高多个cache之间object的利用率,避免内存浪费



    // Initialize the heap.
    _g_ := getg()
    _g_.m.mcache = allocmcache()
package main
import (
var ps *process.Process
func mem(n int){
  if ps == nil{
    p,err := process.NewProcess(int32(os.Getpid()))
    if err!=nil{
    ps = p
  mem,_:= ps.MemoryInfoEx()
  fmt.Printf("%d.VMS:%d MB, RSS:%d MB\n", n,mem.VMS>>20, mem.RSS>>20)

func main(){
  data := new([10][1024*1024]byte)

  for i := range data{
   for x,n := 0,len(data[i]);x<n;x++{
     data[i][x] = 1
(base) ➜  readsrc ./readsrc
1.VMS:463 MB, RSS:3 MB
2.VMS:609 MB, RSS:4 MB
3.VMS:609 MB, RSS:7 MB
3.VMS:609 MB, RSS:7 MB




package main

func test() *int{
  x := new(int)
  *x = 0xAABB
  return x
func main(){


go:4             0x4525e1                e87a87fbff              CALL runtime.newobject(SB)


(base) ➜  readsrc go build -gcflags "-m"
# readsrc
./main.go:3:6: can inline test
./main.go:8:6: can inline main
./main.go:9:16: inlining call to test
./main.go:4:11: new(int) escapes to heap
./main.go:9:16: main new(int) does not escape



// implementation of new builtin
// compiler (both frontend and SSA backend) knows the signature
// of this function
func newobject(typ *_type) unsafe.Pointer {
    return mallocgc(typ.size, typ, true)
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
noscan := typ == nil || typ.ptrdata == 0 //判断是不是指针类型
    if size <= maxSmallSize { //如果是小对象
func bgsweep(c chan int) {
// Find a span to sweep.
    var s *mspan
    sg := mheap_.sweepgen
    for {
        s = mheap_.sweepSpans[1-sg/2%2].pop()
        if s == nil {
            atomic.Store(&mheap_.sweepdone, 1)
        if s.state != mSpanInUse {
            // This can happen if direct sweeping already
            // swept this span, but in that case the sweep
            // generation should always be up-to-date.
            if !(s.sweepgen == sg || s.sweepgen == sg+3) {
                print("runtime: bad span s.state=", s.state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "\n")
                throw("non in-use span in unswept list")
        if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {

    // Sweep the span we found.
    npages := ^uintptr(0)
    if s != nil {
        npages = s.npages
        if s.sweep(false) {
            // Whole span was freed. Count it toward the
            // page reclaimer credit since these pages can
            // now be used for span allocation.
            atomic.Xadduintptr(&mheap_.reclaimCredit, npages)
        } else {
            // Span is still in-use, so this returned no
            // pages to the heap and the span needs to
            // move to the swept in-use list.
            npages = 0
    // roles on each GC cycle. Since the sweepgen increases by 2
    // on each cycle, this means the swept spans are in
    // sweepSpans[sweepgen/2%2] and the unswept spans are in
    // sweepSpans[1-sweepgen/2%2].

这里mheap_.sweepSpans存放的都是使用中的span, 分为已清理和未清理两种,从未清里的span中pop出一个,
,调用res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty), mheap_.freeSpan(s, false)来将span归还给heap


在运行时入口函数main.main中会调用gcenable(),接着就会起一个协程go bgscavenge(c)
这里会计算我们需要保留多少mheap,如果比我们需要的大,则调用mheap_.scavengeLocked(uintptr(retained - want))

            // Calculate how big we want the retained heap to be
            // at this point in time.
            // The formula is for that of a line, y = b - mx
            // We want y (want),
            //   m = scavengeBytesPerNS (> 0)
            //   x = time between scavengeTimeBasis and now
            //   b = scavengeRetainedBasis
            rate := mheap_.scavengeBytesPerNS
            tdist := nanotime() - mheap_.scavengeTimeBasis
            rdist := uint64(rate * float64(tdist))
            want := mheap_.scavengeRetainedBasis - rdist
            // If we're above the line, scavenge to get below the
            // line.
            if retained > want {
                released = mheap_.scavengeLocked(uintptr(retained - want))


// Returns the amount of memory scavenged in bytes. h must be locked.
func (h *mheap) scavengeLocked(nbytes uintptr) uintptr {
    released := uintptr(0)
    // Iterate over spans with huge pages first, then spans without.
    const mask = treapIterScav | treapIterHuge
    for _, match := range []treapIterType{treapIterHuge, 0} {
        // Iterate over the treap backwards (from highest address to lowest address)
        // scavenging spans until we've reached our quota of nbytes.
        for t := h.free.end(mask, match); released < nbytes && t.valid(); {
            s := t.span()
            start, end := s.physPageBounds()
            if start >= end {
                // This span doesn't cover at least one physical page, so skip it.
                t = t.prev()
            n := t.prev()
            if span := h.scavengeSplit(t, nbytes-released); span != nil {
                s = span
            } else {
            released += s.scavenge()
            // Now that s is scavenged, we must eagerly coalesce it
            // with its neighbors to prevent having two spans with
            // the same scavenged state adjacent to each other.
            t = n
    return released

span调用s.scavenge(). 在这个函数里面调用sysUnused函数,再调用系统的madvise(unsafe.Pointer(head), physHugePageSize, _MADV_NOHUGEPAGE) 至此,内存释放完毕.
madvise告知操作系统这段内存暂不使用,建议内存回收其物理内存。 如果物理内存资源充足,建议可能会被忽略。




