Golang之Map源码解析

2019-12-04 本文已影响0人踏雪寻梅be

在Golang情景中，Map主要分为两种：sync.Map和内置Map，两者主要区别是内置Map不
支持并发读写，sync.Map支持并发操作。本文章主要解读内置Map。
Golang中Map由链式哈希表实现，主要涉及创建、插入、查找、删除等基本操作，而核心
涉及到Map的冲突解决、扩容机制及迁移策略，这也是Map中最难理解的部分。
在进入Map分析之前，先来回顾一下链式哈希，如图1：

图1

该图概括的描述了链式哈希的架构，桶指针指向桶的首地址，可通过桶首地址及偏移量查
询所有的桶，通过每个桶可查找到对应的键值。

Map结构体

```
type hmap struct {
    count        int               /* Map中元素数量 */
    flags        int8              /* 相关标志位 */
    B            int8              /* （1<< B * 6.5）为最多元素的数量 */
    noverflow    int16             /* 溢出桶的数量 */
    hash0        uint32            /* 哈希种子 */
    buckets      unsafe.Pointer    /* 桶指针 */
    oldbuckets   unsafe.Pointer    /* 桶指针（只有扩容的时候才使用，指向旧的桶） */
    nevacuate    uintptr           /* 用于桶搬迁 */
    extra        *mapextra         /* 当key/value非指针时使用 */
}

type mapextra struct {
    overflow      *[]*bmap         /* 溢出桶的指针 */
    oldoverflow   *[]*bmap             
    nextOverflow  *bmap                   
}

type bmap struct {
    tophash  [bucketCnt]uint8      /* bucketCnt=8，一个桶只能存放8对k/v, 低B位用来寻找桶，高8位用来寻找元素 */
}

/* 当kev/value不为指针时，溢出桶存放到mapextra结构中，overflow存放buckets中的溢出
    桶，oldoverflow存放oldbuckets中的溢出桶，nextOverflow预分配溢出桶空间 */
type mapextra struct {
    overflow        *[]*bmap       /* 以切片形式存放buckets中的每个溢出桶 */
    oldoverflow     *[]*bmap       /*  以切片形式存放oldbuckets中的每个溢出桶*/
    nextOverflow    *bmap
}

/* map标志位 */
iterator           1        /* 迭代器buckets桶的标志位，为1表示正在使用buckets*/
oldIterator        2        /* 迭代器oldbuckets桶的标志位 ，为1表示正在使用oldbuckets*/
hashWriting        3        /* 并发写标志位，为1表示有goroutine正在写map */
sameSizeGrow       4        /* 等量扩容标志，表示申请的桶数量和原来一样 */

```

Map主要使用以上4个结构，其中hmap是最主要的结构，是整个哈希表的起点；mapextra是溢出桶相关的操作；bmap是用来对桶中的元素进行查找操作；mapextra是对非指针的键值溢出桶的存储。

Map创建

本章主要讨论使用make函数创建map的底层实现操作。

make([Type]Type)
当不指定map元素数量时，使用make_small函数创建hmap结构，但并不初始化桶。产生哈希种子-->返回。
```
func makemap_small() *hmap {            
    h := new(hmap)
    h.hash0 = fastrand()          /* 创建哈希种子 */
    return h
}
```

make([Type]Type, len)
指定元素数量，当元素数量小于8并且小于1<<B*6.5时，B = 0，此时仍然不会初始化桶指针buckets，只产生哈希种子返回，在使用的过程中初始化；其他情况设定B的值，并对桶指针buckets进行初始化。

func makemap(t *maptype, hint int, h *hmap) *hmap {
    mem, overflow := math.MulUintptr(uintptr(hint), t.bucket.size)
    if overflow || mem > maxAlloc {
        hint = 0
    }

    if h == nil {
        h = new(hmap)                /* 新建hmap结构 */
    }
    h.hash0 = fastrand()             /* 产生哈希种子 */

    B := uint8(0)
    for overLoadFactor(hint, B) {    /* 确定B的值 */
        B++
    }
    h.B = B

    if h.B != 0 {                    /* B != 0 时初始化桶指针buckets */
        var nextOverflow *bmap
        h.buckets, nextOverflow = makeBucketArray(t, h.B, nil)    /* 初始化桶指针
                                                                                                        buckets并分配空间 */
        if nextOverflow != nil {
            h.extra = new(mapextra)
            h.extra.nextOverflow = nextOverflow         /* 设置溢出桶 */
        }
    }
    return h
}

makeBucketArray()函数分析。
该函数主要用来为桶分配存储空间，分配好的架构如图所示2：

图2

代码分析：

```
func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) {
    base := bucketShift(b)                      /* 桶的数量 */
    nbuckets := base

    if b >= 4 {
        nbuckets += bucketShift(b - 4)
        sz := t.bucket.size * nbuckets
        up := roundupsize(sz)
        if up != sz {
            nbuckets = up / t.bucket.size
        }
    }

    if dirtyalloc == nil {
        buckets = newarray(t.bucket, int(nbuckets))
    } else {
        buckets = dirtyalloc
        size := t.bucket.size * nbuckets
        if t.bucket.kind&kindNoPointers == 0 {
            memclrHasPointers(buckets, size)
        } else {
            memclrNoHeapPointers(buckets, size)
        }
    }

    if base != nbuckets {
        nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))
        last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))
        last.setoverflow(t, (*bmap)(buckets))
    }
    return buckets, nextOverflow
}
```

Map查找

Map查找主要是通过哈希值的低位确定桶的地址，再用高8位找到对应的key值。实现函数主要是mapaccess1()，如果找到就返回key所对应的值。

```
func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
    if raceenabled && h != nil {
        callerpc := getcallerpc()
        pc := funcPC(mapaccess1)
        racereadpc(unsafe.Pointer(h), callerpc, pc)
        raceReadObjectPC(t.key, key, callerpc, pc)
    }
    if msanenabled && h != nil {
        msanread(key, t.key.size)
    }
    if h == nil || h.count == 0 {            /* 判断哈希表中是否含有数据 */
        if t.hashMightPanic() {
            t.key.alg.hash(key, 0) // see issue 23734
        }
        return unsafe.Pointer(&zeroVal[0])
    }
    if h.flags&hashWriting != 0 {         /* 是否并发写 */
        throw("concurrent map read and map write")
    }
    alg := t.key.alg
    hash := alg.hash(key, uintptr(h.hash0))      /* 计算键的哈希值 */
    m := bucketMask(h.B)                                /* 1<<h.B -1 ，低B位掩码*/
    b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))    /* 找到相应的桶，hash&m为第n个桶 */
    if c := h.oldbuckets; c != nil {
        if !h.sameSizeGrow() {
            m >>= 1
        }
        oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
        if !evacuated(oldb) {
            b = oldb
        }
    }
    top := tophash(hash)            /* 计算该键tophash的值 */
bucketloop:
    for ; b != nil; b = b.overflow(t) {                            /* 依次查找桶或溢出桶的元素 */
        for i := uintptr(0); i < bucketCnt; i++ {          /* 依次遍历桶中的每个key， 
                                                                                            bucketCnt=8 */
            if b.tophash[i] != top {                          /* 如果找到top值，则比较第i个key */
                if b.tophash[i] == emptyRest {
                    break bucketloop
                }
                continue
            }
            k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))  /* 求key地址 */
            if t.indirectkey() {
                k = *((*unsafe.Pointer)(k))
            }
            if alg.equal(key, k) {   /* 比较键是否相等。如果相等，则找到key对应的值 */
                v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
                if t.indirectvalue() {
                    v = *((*unsafe.Pointer)(v))
                }
                return v
            }
        }
    }
    return unsafe.Pointer(&zeroVal[0])
}
```

Map插入

Map插入操作首先遍历元素：
1. 如果找到对应的键，则更新该键；
2. 如果未找到且剩下的空间为empty，则将新的键存到该位置；
3. 如果未找到且遍历完buckets，查看是否有溢出桶，若有则遍历溢出桶；如果没有溢出
桶，则申请一个新的溢出桶存放该元素。

```
func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
    if h == nil {
        panic(plainError("assignment to entry in nil map"))
    }
    if raceenabled {
        callerpc := getcallerpc()
        pc := funcPC(mapassign)
        racewritepc(unsafe.Pointer(h), callerpc, pc)
        raceReadObjectPC(t.key, key, callerpc, pc)
    }
    if msanenabled {
        msanread(key, t.key.size)
    }
    if h.flags&hashWriting != 0 {
        throw("concurrent map writes")
    }
    alg := t.key.alg
    hash := alg.hash(key, uintptr(h.hash0))

    h.flags ^= hashWriting

    if h.buckets == nil {
        h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
    }

again:
    bucket := hash & bucketMask(h.B)
    if h.growing() {
        growWork(t, h, bucket)
    }
    b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
    top := tophash(hash)

    var inserti *uint8
    var insertk unsafe.Pointer
    var val unsafe.Pointer
bucketloop:
    for {
        for i := uintptr(0); i < bucketCnt; i++ {                        /* 遍历桶中的8个key */
            if b.tophash[i] != top {
                if isEmpty(b.tophash[i]) && inserti == nil {
                    inserti = &b.tophash[i]
                    insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
                    val = add(unsafe.Pointer(b),  dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
                }
                if b.tophash[i] == emptyRest {
                    break bucketloop
                }
                continue
            }
            k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
            if t.indirectkey() {
                k = *((*unsafe.Pointer)(k))
            }
            if !alg.equal(key, k) {
                continue
            }
            if t.needkeyupdate() {
                typedmemmove(t.key, k, key)
            }
            val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
            goto done
        }
        ovf := b.overflow(t)
        if ovf == nil {
            break
        }
        b = ovf
    }

    if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
        hashGrow(t, h)
        goto again // Growing the table invalidates everything, so try again
    }

    if inserti == nil {
        newb := h.newoverflow(t, b)
        inserti = &newb.tophash[0]
        insertk = add(unsafe.Pointer(newb), dataOffset)
        val = add(insertk, bucketCnt*uintptr(t.keysize))
    }

    if t.indirectkey() {
        kmem := newobject(t.key)
        *(*unsafe.Pointer)(insertk) = kmem
        insertk = kmem
    }
    if t.indirectvalue() {
        vmem := newobject(t.elem)
        *(*unsafe.Pointer)(val) = vmem
    }
    typedmemmove(t.key, insertk, key)
    *inserti = top
    h.count++

done:
    if h.flags&hashWriting == 0 {
        throw("concurrent map writes")
    }
    h.flags &^= hashWriting
    if t.indirectvalue() {
        val = *((*unsafe.Pointer)(val))
    }
    return val
}
```

Map扩容

当前没有正在扩容，触发条件：

元素数量太多，超过1<<h.B*6.5；

溢出桶数量太多，超过1<<h.B。
扩容分为2倍扩容和等量扩容两种。当元素数量超过1<<h.B*6.5时为增量扩容，分配的桶数量为原来的2倍（1<<(h.B+1)）；当溢出桶数量超过1<<h.B时为等量扩容，分配的桶数量和原来相等（1<<h.B），扩容主要由hashGrow()实现，该函数主要是预分配桶空间，但并不进行数据的迁移。

 ```
 func hashGrow(t *maptype, h *hmap) {
     bigger := uint8(1)
     if !overLoadFactor(h.count+1, h.B) {    /* 判断是2倍空间扩容还是等量空间扩容 */
         bigger = 0
         h.flags |= sameSizeGrow             /* 等量空间扩容，bigger=0 */
     }
     oldbuckets := h.buckets
     newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)   /* 分配桶空间 */

     flags := h.flags &^ (iterator | oldIterator)      /* 将buckets和oldbuckets迭代标志置0 */
     if h.flags&iterator != 0 {
         flags |= oldIterator
     }

     h.B += bigger                      /* 增量扩容为h.B+1，等量扩容为h.B */
     h.flags = flags
     h.oldbuckets = oldbuckets
     h.buckets = newbuckets
     h.nevacuate = 0                  /* 搬迁状态为0表示未进行迁移 */
     h.noverflow = 0

         /* 当key/value不是指针时，用extramap中的指针存储溢出桶，而不用bmap中的        
          * overflow。overflow表示hmap结构buckets中的溢出桶，oldoverflow表示hmap中
          * oldbuckets中的溢出桶 ，nextoverflow预分配溢出桶空间 。
          */
     if h.extra != nil && h.extra.overflow != nil {
         if h.extra.oldoverflow != nil {
             throw("oldoverflow is not nil")
         }
         h.extra.oldoverflow = h.extra.overflow
         h.extra.overflow = nil
     }
     if nextOverflow != nil {
         if h.extra == nil {
              h.extra = new(mapextra)
         }
         h.extra.nextOverflow = nextOverflow
     }
 }
 ```

Map迁移

func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
     b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))  /* oldbucket为旧桶的索引 */
     newbit := h.noldbuckets()                         /* 与原来旧桶分配的容量相等 */
     if !evacuated(b) {
         var xy [2]evacDst
         x := &xy[0]                                   /* 等量扩容或2倍扩容的前一部分（X，和原来相等） */
         x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
         x.k = add(unsafe.Pointer(x.b), dataOffset)    /* key的地址 */
         x.v = add(x.k, bucketCnt*uintptr(t.keysize))  /* value得地址 */

         if !h.sameSizeGrow() {
             y := &xy[1]                               /* 若为2倍扩容，需要后一部分，即增长的空间 */
             y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))  /* 后一部分桶的索引 */
             y.k = add(unsafe.Pointer(y.b), dataOffset)
             y.v = add(y.k, bucketCnt*uintptr(t.keysize))
         }

         for ; b != nil; b = b.overflow(t) {              /* 遍历最后一个bmap及溢出桶 */
             k := add(unsafe.Pointer(b), dataOffset)      /* key的地址 */
             v := add(k, bucketCnt*uintptr(t.keysize))    /* value的地址 */
             for i := 0; i < bucketCnt; i, k, v = i+1, add(k, uintptr(t.keysize)), add(v, uintptr(t.valuesize)) {  /* 遍历桶中的元素 */
                 top := b.tophash[i]                      /* 获取tophash的值 */
                 if isEmpty(top) {                        /* 如果tophash为空，标记为已被搬迁状态 */
                     b.tophash[i] = evacuatedEmpty  
                     continue
                 }
                 if top < minTopHash {                    /* tophash的值为hash+minTopHash */
                     throw("bad map state")
                 }
                 k2 := k
                 if t.indirectkey() {
                     k2 = *((*unsafe.Pointer)(k2))
                 }
                 var useY uint8                          /* useY用来判断是落在oldbucket还是newbit */
                 if !h.sameSizeGrow() {                  /* 如果为2倍扩容，h.B增大1，桶的位置发生变化 */
                     hash := t.key.alg.hash(k2, uintptr(h.hash0))
                     if h.flags&iterator != 0 && !t.reflexivekey() && !t.key.alg.equal(k2,k2) {
                         useY = top & 1
                         top = tophash(hash)
                     } else {
                         if hash&newbit != 0 {
                             useY = 1
                         }
                     }
                 }

                 /* evacuatedY = evacuatedX + 1 */
                 if evacuatedX+1 != evacuatedY || evacuatedX^1 != evacuatedY {
                     throw("bad evacuatedN")
                 }

                 b.tophash[i] = evacuatedX + useY  /* 搬迁为X或者Y状态 */
                 dst := &xy[useY]                  /* useY=0表示搬迁到前半部分， 否则到后半部分*/

                 if dst.i == bucketCnt {           /* 当桶中元素数量达到最大8时，需要溢出桶 */
                     dst.b = h.newoverflow(t, dst.b)
                     dst.i = 0
                     dst.k = add(unsafe.Pointer(dst.b), dataOffset)
                     dst.v = add(dst.k, bucketCnt*uintptr(t.keysize))
                 }
                 dst.b.tophash[dst.i&(bucketCnt-1)] = top
                 if t.indirectkey() {
                     *(*unsafe.Pointer)(dst.k) = k2   /* key为指针时，复制指针 */
                 } else {
                     typedmemmove(t.key, dst.k, k)
                 }
                 if t.indirectvalue() {
                     *(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v) /* value为指针时，复制指针 */
                 } else {
                     typedmemmove(t.elem, dst.v, v)
                 }
                 /* 进行下一个元素的搬迁 */
                 dst.i++
                 dst.k = add(dst.k, uintptr(t.keysize))
                 dst.v = add(dst.v, uintptr(t.valuesize))
             }
         }

         /* 遍历完桶后，如果没有其他goroutine使用该桶，就把该桶清空 */
         if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
             b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
             ptr := add(b, dataOffset)
             n := uintptr(t.bucketsize) - dataOffset
             memclrHasPointers(ptr, n)
         }
     }

     if oldbucket == h.nevacuate {
         advanceEvacuationMark(h, t, newbit)
     }
 }

 /* 确定桶的搬迁进度，如果搬迁完成进行后续操作 */
 func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) {
     h.nevacuate++
     stop := h.nevacuate + 1024
     if stop > newbit {
         stop = newbit
     }
     for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {  /*如果搬迁没有完成将搬迁进度nevacuate加1 */
         h.nevacuate++
     }
     if h.nevacuate == newbit {
         h.oldbuckets = nil                /* 搬迁完成，将oldbuckets置nil */
         if h.extra != nil {
             h.extra.oldoverflow = nil     /* 溢出桶置为nil */
         }
         h.flags &^= sameSizeGrow          /* 等量扩容位置0 */
     }
 }