redis ziplist(9)
2017-05-30 本文已影响134人
lmem
首先要了解设计思想!!!
在 Redis 中,list 有两种存储方式:双链表(LinkedList)和压缩双链表(ziplist)。双链 表即普通数据结构中遇到的,在 adlist.h 和 adlist.c 中实现。压缩双链表以连续的内存空间 来表示双链表,压缩双链表节省前驱和后驱指针的空间(8b),这在小的list 上,压缩效率 是非常明显的,因为一个普通的双链表中,前驱后驱指针在 64 位机器上需分别占用 8B; 压缩双链表在 ziplist.h 和 ziplist.c 中实现。这篇主要详述压缩双链表,普通双链表可以参看其他
在压缩双链表中,节省了前驱和后驱指针的空间,在 64 位机器上共节省了 8 个字节, 这让数据在内存中更为紧凑。只要清晰的描述每个数据项的边界,就可以轻易得到前驱后 驱数据项的位置,ziplist 就是这么做的。
1.ziplist 的数据格式
<zlbytes><zltail><zllen><entry>...<entry><zlend>
2. entry 的数据格式
|<prelen><<encoding+lensize><len>><data>|
|---1----------------2--------------3---|
prelen 1 或5 字节
<encoding+lensize><len> 1 或 5 字节
域 1)是前驱数据项的大小。因为不用描述前驱的数据类型,描述较为简单。
域 2)是此数据项的的类型和数据大小。为了节省空间,redis 预设定了多种长度的字符串 和整数。
3 种长度的字符串:
#define ZIP_STR_06B (0 << 6)
#define ZIP_STR_14B (1 << 6)
#define ZIP_STR_32B (2 << 6)
5 种长度的整数:
#define ZIP_INT_16B (0xc0 | 0<<4)
#define ZIP_INT_32B (0xc0 | 1<<4)
#define ZIP_INT_64B (0xc0 | 2<<4)
#define ZIP_INT_24B (0xc0 | 3<<4)
#define ZIP_INT_8B 0xfe
头部信息中的另一个值记录着编码的方式,当编码的是字符串,头部的前2位为00,01,10共3种
* 如果编码的是整型数字的时候,则头部的前2位为11,代表的是整数编码,后面2位代表什么类型整型值将会在头部后面被编码
* 00-int16_t, 01-int32_t, 10-int64_t, 11-24 bit signed,还有比较特殊的2个,11111110-8 bit signed,
* 1111 0000 - 1111 1101,代表的是整型值0-12,头尾都已经存在,都不能使用,与传统的通过固定的指针表示长度,这么做的好处实现
* 可以更合理的分配内存
*
* String字符串编码的3种形式
* |00pppppp| - 1 byte
* String value with length less than or equal to 63 bytes (6 bits).
* |01pppppp|qqqqqqqq| - 2 bytes
* String value with length less than or equal to 16383 bytes (14 bits).
* |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
* String value with length greater than or equal to 16384 bytes.
* |11000000| - 1 byte
* Integer encoded as int16_t (2 bytes).
* |11010000| - 1 byte
* Integer encoded as int32_t (4 bytes).
* |11100000| - 1 byte
* Integer encoded as int64_t (8 bytes).
* |11110000| - 1 byte
* Integer encoded as 24 bit signed (3 bytes).
* |11111110| - 1 byte
* Integer encoded as 8 bit signed (1 byte).
* |1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.
* Unsigned integer from 0 to 12. The encoded value is actually from
* 1 to 13 because 0000 and 1111 can not be used, so 1 should be
* subtracted from the encoded 4 bit value to obtain the right value.
* |11111111| - End of ziplist.
*
3.插入操作
/* Insert item at "p". */
static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;
unsigned int prevlensize, prevlen = 0;
size_t offset;
int nextdiff = 0;
unsigned char encoding = 0;
long long value = 123456789; /* initialized to avoid warning. Using a value
that is easy to see if for some reason
we use it uninitialized. */
zlentry tail;
/* Find out prevlen for the entry that is inserted. */
if (p[0] != ZIP_END) {
ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
} else {
unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);//获取尾部数据
if (ptail[0] != ZIP_END) {
prevlen = zipRawEntryLength(ptail);
}
}
/* See if the entry can be encoded */
if (zipTryEncoding(s,slen,&value,&encoding)) {
/* 'encoding' is set to the appropriate integer encoding */
reqlen = zipIntSize(encoding);
} else {
/* 'encoding' is untouched, however zipEncodeLength will use the
* string length to figure out how to encode it. */
reqlen = slen;
}
/* We need space for both the length of the previous entry and
* the length of the payload. */
reqlen += zipPrevEncodeLength(NULL,prevlen);
reqlen += zipEncodeLength(NULL,encoding,slen);
/* When the insert position is not equal to the tail, we need to
* make sure that the next entry can hold this entry's length in
* its prevlen field. */
// 确保下一个的prelen字段可以使用插入值
nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
/* Store offset because a realloc may change the address of zl. */
offset = p-zl;
zl = ziplistResize(zl,curlen+reqlen+nextdiff);//改变大小
p = zl+offset;//定位到p
/* Apply memory move when necessary and update tail offset. */
if (p[0] != ZIP_END) {
/* Subtract one because of the ZIP_END bytes */
//腾出位置
//dest src len
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
/* Encode this entry's raw length in the next entry. */
zipPrevEncodeLength(p+reqlen,reqlen);
/* Update offset for tail */
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
zipEntry(p+reqlen, &tail);
if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
}
} else {
/* This element will be the new tail. */
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
}
/* When nextdiff != 0, the raw length of the next entry has changed, so
* we need to cascade the update throughout the ziplist */
if (nextdiff != 0) {
offset = p-zl;
zl = __ziplistCascadeUpdate(zl,p+reqlen);
p = zl+offset;
}
/* Write the entry */
//把前一个的lenth写入p,返回字节大小
p += zipPrevEncodeLength(p,prevlen);
//写入 slen,返回当前entry所占用的长度1,或5
p += zipEncodeLength(p,encoding,slen);
//写入数据,字符串直接拷贝,int需要单独判断长度
if (ZIP_IS_STR(encoding)) {
memcpy(p,s,slen);
} else {
zipSaveInteger(p,value,encoding);
}
ZIPLIST_INCR_LENGTH(zl,1);
return zl;
}
4. 查找
/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries
* between every comparison. Returns NULL when the field could not be found.
* |<prelen><<encoding+lensize><len>><data>|
* |---1----------------2--------------3---|*/
//skip 每隔skip个跳过一个
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) {
int skipcnt = 0;
unsigned char vencoding = 0;
long long vll = 0;
while (p[0] != ZIP_END) {//最后一个字节结束标识为111111111
unsigned int prevlensize, encoding, lensize, len;
unsigned char *q;
// prevlensize 1或者5
// |00pppppp| - 1 byte
//* String value with length less than or equal to 63 bytes (6 bits).
//* |01pppppp|qqqqqqqq| - 2 bytes
//* String value with length less than or equal to 16383 bytes (14 bits).
//* |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
ZIP_DECODE_PREVLENSIZE(p, prevlensize);
//encoding 编码方式1字节 lensize len的字节大小 len 数据的长度
ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);
//指针移动到数据位置
q = p + prevlensize + lensize;
if (skipcnt == 0) {//可以取数据判断了
/* Compare current entry with specified entry */
if (ZIP_IS_STR(encoding)) {
//字符串比较
if (len == vlen && memcmp(q, vstr, vlen) == 0) {
return p;
}
} else {
/* Find out if the searched field can be encoded. Note that
* we do it only the first time, once done vencoding is set
* to non-zero and vll is set to the integer value. */
if (vencoding == 0) {
//长度vlen,获取整数的编码vencoding,以及值vll
if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) {
/* If the entry can't be encoded we set it to
* UCHAR_MAX so that we don't retry again the next
* time. */
vencoding = UCHAR_MAX;
}
/* Must be non-zero by now */
assert(vencoding);
}
/* Compare current entry with specified entry, do it only
* if vencoding != UCHAR_MAX because if there is no encoding
* possible for the field it can't be a valid integer. */
if (vencoding != UCHAR_MAX) {
// 链表中的value
long long ll = zipLoadInteger(q, encoding);
if (ll == vll) {
return p;
}
}
}
/* Reset skip count */
skipcnt = skip;
} else {
/* Skip entry */
skipcnt--;
}
/* Move to next entry */
p = q + len;
}
return NULL;
}