sqlite3 hashCode实现
sqlite3 hashCode实现
如果把一个keywords数组,转成hashtable? 借助几个数组,可以快速返回Key
思路就是:keywords数组转成hashtable。
hash冲突的的处理:利用链地址法,处理hash冲突。
空间优化,借组prefix/longgestsuffix等属性,压缩character大数组(keywords展开后的值,用来匹配keyword)
具体实现:
三个比较函数,对应keywords的排序
/*
** Comparision function for two Keyword records
*/
static int keywordCompare1(const void *a, const void *b){
const Keyword *pA = (Keyword*)a;
const Keyword *pB = (Keyword*)b;
int n = pA->len- pB->len;
if( n==0 ){
n = strcmp(pA->zName, pB->zName);
}
assert( n!=0 );
return n;
}
static int keywordCompare2(const void *a, const void *b){
const Keyword *pA = (Keyword*)a;
const Keyword *pB = (Keyword*)b;
int n = pB->longestSuffix- pA->longestSuffix;
if( n==0 ){
n = strcmp(pA->zName, pB->zName);
}
assert( n!=0 );
return n;
}
static int keywordCompare3(const void *a, const void *b){
const Keyword *pA = (Keyword*)a;
const Keyword *pB = (Keyword*)b;
int n = pA->offset- pB->offset;
if( n==0 ) n = pB->id - pA->id;
assert( n!=0 );
return n;
}
1. 分别计算每个keyword的hash值,len
/* Fill in the lengths of strings and hashes for all entries. */
2. 按照len进行keywords排序
/* Sort the table from shortest to longest keyword */
qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1);
3. 检查是否存在内嵌的keywords
/* Look for short keywords embedded in longer keywords */
4. 计算最长的longestSuffix值
/* Compute the longestSuffix value for every word */
5.排序基于longestSuffix
/* Sort the table into reverse order by length */
6. 计算offset,同时会处理longestSuffix,阶段keywords等
/* Fill in the offset for all entries */
7. 排序基于offset
/* Sort the table by offset */
qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3);
8. 计算最有hashtable大小
/* Figure out how big to make the hash table in order to minimize the
** number of collisions */
// the bestCount is the first prime number which bigger than nKeyword ??
// calculate the bestCount by try over and over
一般是取大于nKeyword的最小素数,这里在{n/2-2n}之间计算一下,一个最有值。
9. 计算hash,并设置链地址(iNext)
/* Compute the hash */
for(i=0; i
for(i=0; i
h = aKeywordTable[i].hash % bestSize;
aKeywordTable[i].iNext = aHash[h];
aHash[h] = i+1;
}
10. 生成代码
/* Begin generating code */
生成代码(缩略)
static int keywordCode(const char *z, int n){
1. zText,展开后的字节数组,用来匹配
static const char zText[896] = {...};
2. aHash,一级hash表
static const unsigned char aHash[127] = {...};
3. aNext,链地址数组(使用下表作为地址,这里使用的是相对地址)
static const unsigned char aNext[200] = {...};
4. aLen,关键之长度
static const unsigned char aLen[200] = {...};
5. aOffset 在字节数组中的起始偏移
static const unsigned short int aOffset[200] = {...};
6. 对应的keyword表示
static const unsigned char aCode[200] = {...};
7. hash查找函数,主要内容
int h, i;
if( n<2 ) return TK_ID;
// 计算hash值
h = ((charMap(z[0])*4) ^
(charMap(z[n-1])*3) ^
n) % 127;
//如果冲突,使用aNext匹配下一个
for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){
if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){
return aCode[i];
}
}
return TK_ID;
}