Redis的存储结构笔记

2020-08-17 本文已影响0人 365_9163

hash：给定一个字符串或者其他任意的值X，通过hash函数得到一个散列值；hash表的意思就是建立一个数组，

问题：通过索引（hash值）去读取hash表，hash表会非常大，占用内存非常庞大；通过hash遍历hash表，k-v数量多，查找性能会降到O(N)，时间性能也很低。

redis的hash表：

typedef struct dictht {

dictEntry **table;// 一个数组，数组中每一个元素都有一个指向dictEntry的指针，每个dictEntry结构保存着一个键值对。

unsigned long size; //哈希表的大小，也就是table表的大下。

unsigned long sizemask; //值总是等于size-1，这个值和hash值一起决定键应该被放在哪个索引上面。

unsigned long used;//表示 hash表中已有的节点的数量

} dictht;

进行存储的时候直接使用hash&sizemask就可以得到相应的index，这index值范围0~size-1，也就是0-sizemask；

hash表的节点：

typedef struct dictEntry {

void *key; // 键保存键值对中的键，v保存键值对中的值，值可以使一个指针，或者一个整数或者double类型。

// 值

union {

void *val;

uint64_t u64;

int64_t s64;

double d; } v;

struct dictEntry *next; // 指向下个哈希表节点，形成链表，将多个哈希值相同的键值对连接在一起，解决键冲突的问题。

} dictEntry;

redis字典 dict ：

typedef struct dict {

dictType *type; // 字典类型指向dictType结构的指针，每个dictType结构保存了一组操作特定类型键值对的函数

void *privdata; //保存了需要传给那些类型特定函数的可选参数

dictht ht[2]; // 数组中每个项都是一个dictht哈希表，字典使用ht[0]，ht[1]哈希表在对ht[0]rehash时使用。

long rehashidx; //rehash的位置，如果值不等于-1 说明正在rehash中。

unsigned long iterators; // hash表的迭代器，一般用于rehash和主从复制等等

} dict;

redis如何解决时间效率和空间效率的？

初始化时，字典的hash表大小是4（sizemask是3），通过hash计算出的hash值可能很大，hash值&sizemask，得以存放在表中。hash表是随着k-v数量的增加而逐步增大的，并不直接以hash值为下标去取值，以index=hash值&sizemask 获取下标，取得对应节点，节点是个链表。

ratio=ht[0].used/ht[0].size;

当ratio>=1并且没有进行主从复制和持久化 ,进行扩容，主从复制和持久化时 ratio>5 会进行扩容（避免系统负载高）；

当ratio<0.1进行缩容。

可以看出，并不是开始申请大量内存，而是渐进式扩容或者缩容。

扩容步骤;

1为字典ht[1]哈希表分配合适的空间（扩容的大小为大于size的2的n次方。位运算方便。）2将ht[0]中所有键值对rehash到ht[1]。3

当ht[0] 包含的所有键值对都迁移到ht[1]之后,释放ht[0],将ht[1]设置为ht[0],并在ht[1]新建一个空白哈希表，为下次rehash准备。

渐进式rehash执行流程源码：

aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL)//创建定时器回调，增量处理许多后台定时器的方法：过期键，rehash等

//定时器中断，每秒调用；异步完成许多事情

serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {

//省略其他代码

.......

/* Handle background operations on Redis databases. */

redis数据库后台操作。

databasesCron();

}

databasesCron函数与rehash相关代码，其他代码省略了

void databasesCron(void){

int dbs_per_call = CRON_DBS_PER_CALL; //16

.........

/* Rehash */

if (server.activerehashing) {

for (j = 0; j < dbs_per_call; j++) {

int work_done = incrementallyRehash(rehash_db); //对每个数据库进行渐进式rehash，如果返回1，执行时间到了直接跳出，下次继续进行操作，如果返回0，说明此数据库已经完成，进行下一个数据库继续进行操作。

if (work_done) {

/* If the function did some work, stop here, we'll do more at the next cron loop. */

break;

} else {

/* If this db didn't need rehash, we'll try the next one. */

rehash_db++;

rehash_db %= server.dbnum;

}

..............

}

调用incrementallyRehash函数执行渐进式rehash，看此函数中逻辑代码

int incrementallyRehash(int dbid) {

检测数据库是否进行rehash操作中（rehashidx值是否为-1），没有进行rehash 执行其他代码。

if (dictIsRehashing(server.db[dbid].dict)) {

dictRehashMilliseconds(server.db[dbid].dict,1);//进行对dbid索引数据库进行时长为1毫秒的rehash操作，返回-1，下次继续

return 1; /* already used our millisecond for this loop... */

}

......

return 0;

}

进入dictRehashMilliseconds函数看实现逻辑。

/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */

int dictRehashMilliseconds(dict *d, int ms) {

long long start = timeInMilliseconds();

int rehashes = 0;

while(dictRehash(d,100)) {// 每次循环进行100个桶的rehash操作，直到用时超过1毫秒便返回。

rehashes += 100;

if (timeInMilliseconds()-start > ms) break;

}

return rehashes;

}

到dictRehash函数这才是真怎执行rehash操作的函数。

int dictRehash(dict *d, int n) {

int empty_visits = n*10; /* Max number of empty buckets to visit. */

if (!dictIsRehashing(d)) return 0;

while(n-- && d->ht[0].used != 0) {

dictEntry *de, *nextde;

/* Note that rehashidx can't overflow as we are sure there are more elements because ht[0].used != 0 */

assert(d->ht[0].size > (unsigned long)d->rehashidx);

while(d->ht[0].table[d->rehashidx] == NULL) {

d->rehashidx++;

if (--empty_visits == 0) return 1;

}

de = d->ht[0].table[d->rehashidx];

/* Move all the keys in this bucket from the old to the new hash HT 从ht[0]中移动桶中的所有keys到ht[1]中*/

while(de) {

uint64_t h;

nextde = de->next;

/* Get the index in the new hash table */

h = dictHashKey(d, de->key) & d->ht[1].sizemask;

de->next = d->ht[1].table[h];

d->ht[1].table[h] = de;

d->ht[0].used--;

d->ht[1].used++;

de = nextde;

}

d->ht[0].table[d->rehashidx] = NULL;

d->rehashidx++;

}

/* Check if we already rehashed the whole table... 整个表都已经rehash完成，返回0，used是表中节点个数*/

if (d->ht[0].used == 0) {

zfree(d->ht[0].table);

d->ht[0] = d->ht[1];

_dictReset(&d->ht[1]);

d->rehashidx = -1;

return 0;

}

/* More to rehash... */

return 1;

}

以上就为rehash流程。

rehash时新的数据插入到哪里？

插入数据调用的函数为 dictAdd()，源码如下

/* Add an element to the target hash table */

int dictAdd(dict *d, void *key, void *val)

{

dictEntry *entry = dictAddRaw(d,key,NULL);

if (!entry) return DICT_ERR;

dictSetVal(d, entry, val);

return DICT_OK;

}

调用dictAddRaw进行插入

dictAddRaw(dict *d, void *key, dictEntry **existing){

ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0]; //哈希表的选择，如果正在rehash操作，新的数据将会插入到ht[1]中。

}

rehash时查询数据是在哪个表中查的？

查询时候是在两个中都进行查询，源码如下。

dictEntry *dictFind(dict *d, const void *key)

{

dictEntry *he;

uint64_t h, idx, table;

if (dictSize(d) == 0) return NULL; /* dict is empty */

if (dictIsRehashing(d)) _dictRehashStep(d);

h = dictHashKey(d, key);

for (table = 0; table <= 1; table++) { //两个表循环遍历进行数据查询

idx = h & d->ht[table].sizemask;

he = d->ht[table].table[idx];

while(he) {

if (key==he->key || dictCompareKeys(d, key, he->key))

return he;

he = he->next;

}

if (!dictIsRehashing(d)) return NULL;

}

return NULL;

缩容按照下面计算：ratio值小于0.1时进行

resize :size>4 && used*100/size < 10;

Redis的存储结构笔记

redis如何解决时间效率和空间效率的？

扩容步骤;

rehash时新的数据插入到哪里？

rehash时查询数据是在哪个表中查的？

猜你喜欢

热点阅读