Go map底层实现、扩容规则和特性分类源码分析

发布时间：2023-03-28 14:25:13 来源：亿速云阅读：148 作者：iii 栏目：开发技术

Go map底层实现、扩容规则和特性分类源码分析

引言

在Go语言中，map是一种非常常用的数据结构，它提供了键值对的存储和快速查找功能。本文将深入探讨Go语言中map的底层实现、扩容规则以及特性分类，并通过源码分析来揭示其内部工作机制。

Go map的基本概念

map是Go语言中的一种内置数据类型，它类似于其他语言中的字典或哈希表。map允许我们通过键来存储和检索值，键和值可以是任意类型，只要键的类型支持相等性比较。

// 示例：创建一个map
m := make(map[string]int)
m["key1"] = 1
m["key2"] = 2
fmt.Println(m["key1"]) // 输出: 1

Go map的底层实现

hmap结构体

在Go语言的运行时库中，map的底层实现是通过hmap结构体来完成的。hmap结构体定义如下：

type hmap struct {
    count     int // 当前map中的元素个数
    flags     uint8
    B         uint8  // 桶的数量的对数
    noverflow uint16 // 溢出桶的数量
    hash0     uint32 // 哈希种子
    buckets    unsafe.Pointer // 指向桶数组的指针
    oldbuckets unsafe.Pointer // 扩容时指向旧桶数组的指针
    nevacuate  uintptr        // 扩容时用于记录迁移进度
    extra *mapextra // 可选字段，用于存储额外的信息
}

bmap结构体

bmap结构体表示一个桶，每个桶可以存储8个键值对。bmap结构体的定义如下：

type bmap struct {
    tophash [bucketCnt]uint8 // 存储每个键的哈希值的高8位
    keys    [bucketCnt]keytype // 存储键
    values  [bucketCnt]valuetype // 存储值
    overflow *bmap // 指向下一个溢出桶
}

哈希函数

Go语言中的map使用哈希函数将键映射到桶中。哈希函数的选择对map的性能有很大影响。Go语言中的哈希函数会根据键的类型自动选择最合适的哈希算法。

Go map的扩容规则

扩容条件

当map中的元素数量超过当前桶的数量乘以负载因子时，map会触发扩容。负载因子是一个经验值，通常设置为6.5。

// 扩容条件
if h.count > bucketShift(h.B)*6.5 {
    hashGrow(t, h)
}

扩容过程

扩容过程分为两个阶段：

创建新桶：首先，map会创建一个新的桶数组，其大小是原来的两倍。
迁移数据：然后，map会将旧桶中的数据逐步迁移到新桶中。迁移过程是渐进式的，每次插入或删除操作时都会迁移一部分数据。

// 扩容过程
func hashGrow(t *maptype, h *hmap) {
    bigger := uint8(1)
    if !overLoadFactor(h.count+1, h.B) {
        bigger = 0
        h.flags |= sameSizeGrow
    }
    oldbuckets := h.buckets
    newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)
    h.B += bigger
    h.flags = flags
    h.oldbuckets = oldbuckets
    h.buckets = newbuckets
    h.nevacuate = 0
    h.noverflow = 0
    if h.extra != nil && h.extra.overflow != nil {
        h.extra.oldoverflow = h.extra.overflow
        h.extra.overflow = nil
    }
    if nextOverflow != nil {
        if h.extra == nil {
            h.extra = new(mapextra)
        }
        h.extra.nextOverflow = nextOverflow
    }
}

Go map的特性分类

无序性

map中的元素是无序的，每次遍历map时，元素的顺序可能不同。这是因为map的底层实现是基于哈希表的，哈希表的特性决定了元素的存储顺序与插入顺序无关。

并发安全性

map在并发环境下是不安全的。如果多个goroutine同时对一个map进行读写操作，可能会导致数据竞争。为了在并发环境下安全地使用map，可以使用sync.Map或通过加锁来保护map。

内存管理

map的内存管理是自动的，Go语言的垃圾回收器会自动回收不再使用的map。但是，map的扩容和迁移过程可能会导致内存的短暂增加。

源码分析

map的创建

map的创建是通过make函数来完成的。make函数会调用runtime.makemap函数来创建一个新的hmap结构体，并初始化其字段。

// makemap函数
func makemap(t *maptype, hint int, h *hmap) *hmap {
    if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) {
        hint = 0
    }
    if h == nil {
        h = new(hmap)
    }
    h.hash0 = fastrand()
    B := uint8(0)
    for overLoadFactor(hint, B) {
        B++
    }
    h.B = B
    if h.B != 0 {
        var nextOverflow *bmap
        h.buckets, nextOverflow = makeBucketArray(t, h.B, nil)
        if nextOverflow != nil {
            h.extra = new(mapextra)
            h.extra.nextOverflow = nextOverflow
        }
    }
    return h
}

map的插入

map的插入操作是通过runtime.mapassign函数来完成的。该函数会根据键的哈希值找到对应的桶，并将键值对插入到桶中。

// mapassign函数
func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
    if h == nil {
        panic(plainError("assignment to entry in nil map"))
    }
    if h.flags&hashWriting != 0 {
        throw("concurrent map writes")
    }
    hash := t.hasher(key, uintptr(h.hash0))
    h.flags ^= hashWriting
    again:
    bucket := hash & bucketMask(h.B)
    if h.growing() {
        growWork(t, h, bucket)
    }
    b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
    top := tophash(hash)
    for {
        for i := uintptr(0); i < bucketCnt; i++ {
            if b.tophash[i] != top {
                continue
            }
            k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
            if t.key.equal(key, k) {
                v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
                return v
            }
        }
        if b.overflow == nil {
            break
        }
        b = b.overflow
    }
    if h.growing() {
        growWork(t, h, bucket)
        goto again
    }
    if h.count >= bucketShift(h.B)*6.5 {
        hashGrow(t, h)
        goto again
    }
    if h.extra != nil && h.extra.overflow != nil {
        h.extra.overflow = nil
    }
    inserti := &b.tophash[0]
    insertk := add(unsafe.Pointer(b), dataOffset)
    val := add(insertk, bucketCnt*uintptr(t.keysize))
    for {
        for i := uintptr(0); i < bucketCnt; i++ {
            if b.tophash[i] == emptyRest {
                break
            }
            k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
            if t.key.equal(key, k) {
                v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
                return v
            }
        }
        if b.overflow == nil {
            break
        }
        b = b.overflow
    }
    if h.extra != nil && h.extra.nextOverflow != nil {
        b = h.extra.nextOverflow
        h.extra.nextOverflow = b.overflow
        b.overflow = nil
        inserti = &b.tophash[0]
        insertk = add(unsafe.Pointer(b), dataOffset)
        val = add(insertk, bucketCnt*uintptr(t.keysize))
    }
    if t.indirectkey() {
        kmem := newobject(t.key)
        *(*unsafe.Pointer)(insertk) = kmem
        insertk = kmem
    }
    if t.indirectvalue() {
        vmem := newobject(t.elem)
        *(*unsafe.Pointer)(val) = vmem
    }
    typedmemmove(t.key, insertk, key)
    *inserti = top
    h.count++
    return val
}

map的查找

map的查找操作是通过runtime.mapaccess1函数来完成的。该函数会根据键的哈希值找到对应的桶，并在桶中查找键值对。

// mapaccess1函数
func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
    if h == nil || h.count == 0 {
        return unsafe.Pointer(&zeroVal[0])
    }
    if h.flags&hashWriting != 0 {
        throw("concurrent map read and map write")
    }
    hash := t.hasher(key, uintptr(h.hash0))
    m := bucketMask(h.B)
    b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
    if c := h.oldbuckets; c != nil {
        if !h.sameSizeGrow() {
            m >>= 1
        }
        oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
        if !evacuated(oldb) {
            b = oldb
        }
    }
    top := tophash(hash)
    for ; b != nil; b = b.overflow {
        for i := uintptr(0); i < bucketCnt; i++ {
            if b.tophash[i] != top {
                continue
            }
            k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
            if t.key.equal(key, k) {
                v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
                return v
            }
        }
    }
    return unsafe.Pointer(&zeroVal[0])
}

map的删除

map的删除操作是通过runtime.mapdelete函数来完成的。该函数会根据键的哈希值找到对应的桶，并在桶中删除键值对。

// mapdelete函数
func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
    if h == nil || h.count == 0 {
        return
    }
    if h.flags&hashWriting != 0 {
        throw("concurrent map writes")
    }
    hash := t.hasher(key, uintptr(h.hash0))
    h.flags ^= hashWriting
    bucket := hash & bucketMask(h.B)
    if h.growing() {
        growWork(t, h, bucket)
    }
    b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
    top := tophash(hash)
    for {
        for i := uintptr(0); i < bucketCnt; i++ {
            if b.tophash[i] != top {
                continue
            }
            k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
            if t.key.equal(key, k) {
                v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
                if t.indirectkey() {
                    *(*unsafe.Pointer)(k) = nil
                } else if t.key.kind&kindNoPointers == 0 {
                    memclrHasPointers(k, t.key.size)
                }
                if t.indirectvalue() {
                    *(*unsafe.Pointer)(v) = nil
                } else if t.elem.kind&kindNoPointers == 0 {
                    memclrHasPointers(v, t.elem.size)
                }
                b.tophash[i] = emptyOne
                h.count--
                break
            }
        }
        if b.overflow == nil {
            break
        }
        b = b.overflow
    }
    if h.flags&hashWriting == 0 {
        throw("concurrent map writes")
    }
    h.flags &^= hashWriting
}

map的遍历

map的遍历操作是通过runtime.mapiterinit和runtime.mapiternext函数来完成的。mapiterinit函数会初始化一个迭代器，mapiternext函数会返回下一个键值对。

// mapiterinit函数
func mapiterinit(t *maptype, h *hmap, it *hiter) {
    if h == nil || h.count == 0 {
        return
    }
    if h.flags&hashWriting != 0 {
        throw("concurrent map iteration and map write")
    }
    it.t = t
    it.h = h
    it.buckets = h.buckets
    if t.bucket.kind&kindNoPointers != 0 {
        h.createOverflow()
        it.overflow = h.extra.overflow
        it.oldoverflow = h.extra.oldoverflow
    }
    r := uintptr(fastrand())
    if h.B > 31-bucketCntBits {
        r += uintptr(fastrand()) << 31
    }
    it.startBucket = r & bucketMask(h.B)
    it.offset = uint8(r >> h.B & (bucketCnt - 1))
    it.bucket = it.startBucket
    mapiternext(it)
}

// mapiternext函数
func mapiternext(it *hiter) {
    h := it.h
    if h.flags&hashWriting != 0 {
        throw("concurrent map iteration and map write")
    }
    t := it.t
    bucket := it.bucket
    b := it.bptr
    i := it.i
    checkBucket := it.checkBucket
    for {
        if b == nil {
            if bucket == it.startBucket && it.wrapped {
                it.key = nil
                it.value = nil
                return
            }
            if h.growing() && it.B == h.B {
                oldbucket := bucket & it.h.oldbucketmask()
                b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
                if !evacuated(b) {
                    checkBucket = bucket
                } else {
                    b = (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
                    checkBucket = noCheck
                }
            } else {
                b = (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
                checkBucket = noCheck
            }
            bucket++
            if bucket == bucketShift(it.B) {
                bucket = 0
                it.wrapped = true
            }
            i = 0
        }
        for ; i < bucketCnt; i++ {
            offi := (i + it.offset) & (bucketCnt - 1))
            if b.tophash[offi] == emptyRest {
                continue
            }
            if b.tophash[offi] == emptyOne {
                continue
            }
            k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
            if t.indirectkey() {
                k = *((*unsafe.Pointer)(k))
            }
            v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
            if checkBucket != noCheck && !h.sameSizeGrow() {
                hash := t.hasher(k, uintptr(h.hash0))
                if hash&bucketMask(it.B) != checkBucket {
                    continue
                }
            }
            if t.indirectvalue() {
                v = *((*unsafe.Pointer)(v))
            }
            it.key = k
            it.value = v
            it.bucket = bucket
            it.i = i + 1
            it.bptr = b
            return
        }
        b = b.overflow
        i = 0
    }
}

总结

通过本文的分析，我们深入

向AI问一下细节

Go map底层实现、扩容规则和特性分类源码分析

Go map底层实现、扩容规则和特性分类源码分析

目录

引言

Go map的基本概念

Go map的底层实现

hmap结构体

bmap结构体

哈希函数

Go map的扩容规则

扩容条件

扩容过程

Go map的特性分类

无序性

并发安全性

内存管理

源码分析

map的创建

map的插入

map的查找

map的删除

map的遍历

总结

猜你喜欢

Go map底层实现、扩容规则和特性分类源码分析

Go map底层实现、扩容规则和特性分类源码分析

目录

引言

Go map的基本概念

Go map的底层实现

hmap结构体

bmap结构体

哈希函数

Go map的扩容规则

扩容条件

扩容过程

Go map的特性分类

无序性

并发安全性

内存管理

源码分析

map的创建

map的插入

map的查找

map的删除

map的遍历

总结

猜你喜欢

最新资讯

相关推荐

相关标签