Redis源代码分析之二：散列表——Dict（上） , ACE网络编程开发|开源框架

winston 发表于 2012-2-2 19:28:33

Redis源代码分析之二：散列表——Dict（上）

先介绍Redis散列表实现的几个重要数据结构：

字典项DictEntry：typedef struct dictEntry {

void *key;
void *val;
struct dictEntry *next;
} dictEntry;
字典类型DictType：typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
} dictType;
哈希表结构dictht：/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;

字典结构dict：typedef struct dict {

dictType *type;
void *privdata;
dictht ht;
int rehashidx; /* rehashing not in progress if rehashidx == -1 */
int iterators; /* number of iterators currently running */
} dict;

字典迭代器dictIterator：/* If safe is set to 1 this is a safe iteartor, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
typedef struct dictIterator {
dict *d;
int table, index, safe;
dictEntry *entry, *nextEntry;
} dictIterator;
然后介绍一下字典接口定义：dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
int dictReplace(dict *d, void *key, void *val);
int dictDelete(dict *d, const void *key);
int dictDeleteNoFree(dict *d, const void *key);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
void dictPrintStats(dict *d);
unsigned int dictGenHashFunction(const unsigned char *buf, int len);
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d);
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);

下面分析扩张或者创建哈希表的重要函数dictExpand：

/* Expand or create the hashtable */
int dictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hashtable */
unsigned long realsize = _dictNextPower(size);

/* the size is invalid if it is smaller than the number of
* elements already inside the hashtable */
if (dictIsRehashing(d) || d->ht.used > size)
   return DICT_ERR;

/* Allocate the new hashtable and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;

/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht.table == NULL) {
   d->ht = n;
   return DICT_OK;
}

/* Prepare a second hash table for incremental rehashing */
d->ht = n;
d->rehashidx = 0;
return DICT_OK;
}
对于传入的参数：新哈希表的大小size，首先调用内部函数_dictNextPower(size)取得大于size的最小2次幂整数，作为哈希表大小。掩码sizemask为size二进制表示长度减一的全1表示。调用内存管理函数zcalloc分配新哈希表的内存。
接下来，函数判断这是否是哈希表的首次初始化，这通过判断字典的哈希表数组ht的首个元素的dictEntry是否为空实现，如果为空，说明是首次初始化，则将该哈希表的size设为n，直接返回DICT_OK；否则，说明这是一次rehash，那么函数将准备第二个哈希表d->ht，并将d的rehashidx设为0，准备进行后续的增量哈希，然后返回DICT_OK。
下面分析再哈希的实现dictRehash函数：/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
* Note that a rehashing step consists in moving a bucket (that may have more
* thank one key as we use chaining) from the old to the new hash table. */
int dictRehash(dict *d, int n) {
if (!dictIsRehashing(d)) return 0;

while(n--) {
   dictEntry *de, *nextde;

   /* Check if we already rehashed the whole table... */
   if (d->ht.used == 0) {
         zfree(d->ht.table);
         d->ht = d->ht;
         _dictReset(&d->ht);
         d->rehashidx = -1;
         return 0;
   }

   /* Note that rehashidx can't overflow as we are sure there are more
      * elements because ht.used != 0 */
   while(d->ht.table == NULL) d->rehashidx++;
   de = d->ht.table;
   /* Move all the keys in this bucket from the old to the new hash HT */
   while(de) {
         unsigned int h;

         nextde = de->next;
         /* Get the index in the new hash table */
         h = dictHashKey(d, de->key) & d->ht.sizemask;
         de->next = d->ht.table;
         d->ht.table = de;
         d->ht.used--;
         d->ht.used++;
         de = nextde;
   }
   d->ht.table = NULL;
   d->rehashidx++;
}
return 1;
}
首先判断这是否是一次合法的rehash调用，通过判断(ht)->rehashidx!=-1实现。
然后，进行n步rehash。其中的每一步都重复如下步骤：
(1) 检查我们是否已经rehash了整个哈希表（此时d->ht.used为0），如果是，析构旧的哈希表，将d->rehashidx置为-1。
(2)遍历哈希表d->ht，直到找到非空的字典项de，然后此后通过de->next继续遍历。在此之前，通过位操作dictHashKey(d, de->key) & d->ht.sizemask获得在新哈希表d->ht中的索引h，并将该字典项复制到新哈希表d->ht，同时更新两个哈希表的d->ht.used计数。然后将最初de对应的rehashidx对应的字典项标记为NULL，将->rehashidx加1，然后重复(1)层的循环。

作者：Aegeaner 发表于2012-2-1 20:16:48 原文链接

页: [1]

ACE Developer's Archiver

Redis源代码分析之二：散列表——Dict（上）