redis dict

2019-08-15 本文已影响0人多多的大白

字典其实跟我们java 中的hashmap 是一样的道理，一种用于保存键值对（key-value pair）的抽象数据结构。其实就是一种hash结构。

1、字典数据结构

Redis 的字典使用哈希表作为底层实现，一个哈希表里面可以有多个哈希表节点，而每个哈希表节点就保存了字典中的一个键值对。

字典结构

1. Hash表（ dict.h/dictht 结构定义）

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table;//散列数组
    unsigned long size;// 散列数组长度
    unsigned long sizemask;// 散列数组长度掩码 = 散列数组长度-1 
    unsigned long used;//已经使用散列数组长度
} dictht;

1、table 属性是一个数组，数组中的每个元素都是一个指向 dict.h/dictEntry 结构的指针，每个 dictEntry 结构保存着一个键值对。
2、size 属性记录了哈希表的大小，也即是 table 数组的大小，而 used 属性则记录了哈希表目前已有节点（键值对）的数量。
3、sizemask 属性的值总是等于 size - 1 ，这个属性和哈希值一起决定一个键应该被放到 table 数组的哪个索引上面。

2.Hash表节点

单向链表

/* 保存key-value对的结构体 */
typedef struct dictEntry {
    void *key; //键
    union {//值
        void *val;//空类型指针
        uint64_t u64; //无符合整型
        int64_t s64;//有符合整型
        double d;//双精度浮点数
    } v;
    struct dictEntry *next;//下一个对象指针形成链表
} dictEntry;

3.字典

typedef struct dict {
    dictType *type;// 字典类型
    void *privdata;// 私有数据
    dictht ht[2];// 哈希表
    long rehashidx; /*  数据rehash的当前索引位置 rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

1、type ：一个dictType类型的结构体指针，每个不同dictType保存着一系列对该对象操作的函数。（简单理解就是某个对象里面自己的方法，比如说Java中每个对象有hash和equel方法）
2、privdata：保存了需要传给那些类型特定函数的可选参数。（其实我也不知道干嘛的）
3、ht[2] :维护了俩个hash表，主要就是在rehash过程中数据从ht[0]-->ht[1]中
正常情况下数据都只会存储在ht[0]，rehash 过程中数据可能ht[0]有，ht[1]也有。
4、rehashidx：rehas索引，在没有rehash过程中，该数据的值永远为-1，只有在rehash过程中变成大于-1的值。
5、iterators：迭代器。

/* 字典操作的方法 */
typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);// 哈希函数指针，使用key来计算哈希值
    void *(*keyDup)(void *privdata, const void *key);// 复制key的函数指
    void *(*valDup)(void *privdata, const void *obj);// 复制value的函数指针
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);// 比较两个key的函数指针
    void (*keyDestructor)(void *privdata, void *key);// 销毁key的函数指针
    void (*valDestructor)(void *privdata, void *obj);// 销毁value的函数指针
} dictType;

2、hash算法以及冲突

hash算法

redis 在该版本中采用俩种Hash算法
1、dictGenHashFunction: Austin Appleby的MurmurHash3算法
2、dictGenCaseHashFunction: 一个对大小写不敏感的哈希函数（基于djb哈希算法）
在字典结构中貌似没看到使用dictGenCaseHashFunction。

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);
    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}
/* Low level add or find:
 * This function adds the entry but instead of setting a value returns the
 * dictEntry structure to the user, that will make sure to fill the value
 * field as he wishes.
 *
 * This function is also directly exposed to the user API to be called
 * mainly in order to store non-pointers inside the hash value, example:
 *
 * entry = dictAddRaw(dict,mykey,NULL);
 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
 *
 * Return values:
 *
 * If key already exists NULL is returned, and "*existing" is populated
 * with the existing entry if existing is not NULL.
 *
 * If key was added, the hash entry is returned to be manipulated by the caller.
 */
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;
    if (dictIsRehashing(d)) _dictRehashStep(d);
    /* Get the index of the new element, or -1 if
     * the element already exists. */
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;
    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;
    /* Set the hash entry fields. */
    dictSetKey(d, entry, key);
    return entry;
/* Returns the index of a free slot that can be populated with
 * a hash entry for the given 'key'.
 * If the key already exists, -1 is returned
 * and the optional output parameter may be filled.
 *
 * Note that if we are in the process of rehashing the hash table, the
 * index is always returned in the context of the second (new) hash table. */
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
   {
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;
    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    for (table = 0; table <= 1; table++) {
        idx = hash & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}

1、通过dictAddRaw 里面调用的_dictKeyIndex函数中有一句话

if (!dictIsRehashing(d)) break;

可以看出如果不是在rehash 过程中数据肯定都会在ht[0] 上进行计算
2、通过

idx = hash & d->ht[table].sizemask;

来计算具体在ht[0].table 哪个索引下面。

冲突

从源代码中dictAddRaw 函数里面就可以看出

    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

如果将entry的next 赋值table[index]的原有值，再将table[index] 赋值为entry
这样粗暴简单，一切为了时间。

3、rehashing

初始的大小

#define DICT_HT_INITIAL_SIZE     4

两种情况会出现rehash

向字典中添加更多的key-value，就需要扩充散列数组的大小
字典原来有很多key，之后又删除了一部分key，为了节省内存，也会对该字典进行缩容。

dictRehash函数分N步进行渐进式 rehash。当旧哈希表中还有key没移动到新哈希表时，函数返回1，否则返回0。一次rehash过程包含把一个桶从旧哈希表移动到新哈希表（由于我们在同一个桶中使用链表形式保存key-value对，所以一个桶中可能有一个以上的key需要移动）。然而由于哈希表中可能有一部分是空的，并不能保证每一步能对至少一个桶进行rehash，因此我们规定一步中最多只能访问N*10个空桶，否则这么大量的工作可能会造成一段长时间的阻塞

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;
    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;
        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {
            uint64_t h;
            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }
    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }
    /* More to rehash... */
    return 1;

dictRehashMilliseconds函数在ms时间内rehash，超过则停止。

/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;
    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}

进行渐进式 rehash 的过程中，字典会同时使用 ht[0] 和 ht[1] 两个哈希表，所以在渐进式 rehash 进行期间，字典的删除（delete）、查找（find）、更新（update）等操作会在两个哈希表上进行：比如说，要在字典里面查找一个键的话，程序会先在 ht[0] 里面进行查找，如果没找到的话，就会继续到 ht[1] 里面进行查找，诸如此类。

另外，在渐进式 rehash 执行期间，新添加到字典的键值对一律会被保存到 ht[1] 里面，而 ht[0] 则不再进行任何添加操作：这一措施保证了 ht[0] 包含的键值对数量会只减不增，并随着 rehash 操作的执行而最终变成空表。

4、总结

1、Redis 字典用了俩个Hash表，但是正常情况只会使用其中一个ht[0]
2、采用渐进式 rehash 来进行扩容，很大程度上降低了性能损耗。
3、时间复杂度对redis来说非常重要，在给Hash表添加数据就是一个比较好的体现。

5、问题

1、dictScan 通过巧妙的设计实现了无状态的遍历方法？后面仔细找找