您的位置：首页 > 数据库 > Redis

redis源码分析(2)----字典dict

2016-02-13 12:38 711 查看

1. dict的特点

　　字典dict采用hash表作为底层的存储结构。

　　1. hash表的长度保持为2的N次方，最大长度为LONG_MAX。

　　2. hash表采用链式法来解决hash值冲突。

　　3. dict数据结构中保存了两个hash表指针，用于实现rehash的过程。

　　4. 为了防止大数据量情况下rehash过程过分耗时，dict采用渐进式rehash，将rehash的过程分散到每一个增删改查的操作中，从而分摊耗时。

2. dict的定义

//hash表元素
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;     // 链接指针
} dictEntry;

/*
* 自定义的函数，用于实现深度赋值，删除和自定义比较等
*/
typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
dictEntry **table;
unsigned long size;             // table长度
unsigned long sizemask;         // 长度mask，值为size-1
unsigned long used;             // 当前保存了多少个元素
} dictht;

// hash表，其中有两个dictht结构，用于实现rehashing
typedef struct dict {
dictType *type;         // 自定义操作
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
unsigned long iterators; /* number of iterators currently running */
} dict;

　　dict结构图

3. dict创建

/* Create a new hash table */
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));

_dictInit(d,type,privDataPtr);
return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1;
d->iterators = 0;
return DICT_OK;
}

4. dict插入新元素

　　字典增加新元素使用函数dictAdd，内部首先调用dictAddRaw根据key在合适的位置插入新的Entry，然后再设置value。

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
// 增加新的key
dictEntry *entry = dictAddRaw(d,key);

if (!entry) return DICT_ERR;
// 设置value
dictSetVal(d, entry, val);
return DICT_OK;
}

　

　 dictAddRaw的流程总共分为4步，其中比较重要的是step2和step3，step2用于获取key在hash表的下标，如果正在rehash过程中，则这个下标是ht[1]中的，否则就是在ht[0]中，相应的step3选择相应的ht来插入Entry

dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht;

// step1 : 如果当前正在rehash，则执行一次rehash
if (dictIsRehashing(d)) _dictRehashStep(d);

/* Get the index of the new element, or -1 if
* the element already exists. */
// step2 : 获取key在hash表的下标
if ((index = _dictKeyIndex(d, key)) == -1)
return NULL;

/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
// step3 : 如果当前正在渐进式hash的过程中，则使用ht[1]，将新元素加到队列头
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;

/* Set the hash entry fields. */
// step4 : 设置key，这里要决定是否需要为key重新分配内存
dictSetKey(d, entry, key);
return entry;
}

　　_dictKeyIndex用来返回key所在的hash表的下标，它首先会检查是否需要扩展hash表(执行Expand操作)。_dictKeyIndex是一个内部函数，不应该被外部直接调用。

/* Returns the index of a free slot that can be populated with
* a hash entry for the given 'key'.
* If the key already exists, -1 is returned.
*
* Note that if we are in the process of rehashing the hash table, the
* index is always returned in the context of the second (new) hash table. */
/**
*  获取key在字典的下标，如果key已经存在，则返回-1
*/
static int _dictKeyIndex(dict *d, const void *key)
{
unsigned int h, idx, table;
dictEntry *he;

/* Expand the hash table if needed */
// step1 : 检查是否需要扩展hash表，并开始执行扩展操作
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
/* Compute the key hash value */
h = dictHashKey(d, key);
// step2 : 获取index
for (table = 0; table <= 1; table++) {
idx = h & d->ht.sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht.table[idx];
while(he) {
if (dictCompareKeys(d, key, he->key))
return -1;
he = he->next;
}
// 如果字典没有执行rehash操作，则直接返回ht[0]中的下标
if (!dictIsRehashing(d)) break;
}
return idx;
}

　　

　　_dictExpandIfNeeded 会判断当前是否需要扩展字典dict并在需要的时候执行扩展操作，它也是一个内部函数，红色的代码标明了扩展的条件：

　　1.  如果dict_can_resize被设置并且 used/size >= 1，则扩展dict，并且扩展的长度为used * 2；

　　2.  如果 used / size > dict_force_resize_ratio，则强制扩展操作 (dict_force_resize_ratio值为5)。

/* Expand the hash table if needed */
/**
*  检查当前是否需要扩展hash表
*/
static int _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already in progress. Return. */
// step1 : 检查当前是否已经在进行渐进式rehash，如果是则返回OK
if (dictIsRehashing(d)) return DICT_OK;

/* If the hash table is empty expand it to the initial size. */
// step2 : 如果当前hash表是空的，则直接扩展为初始大小
if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

/* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the "safe" threshold, we resize doubling
* the number of buckets. */
19     if (d->ht[0].used >= d->ht[0].size &&
20         (dict_can_resize ||
21          d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
22     {
23         return dictExpand(d, d->ht[0].used*2);
24     }
return DICT_OK;
}


　　dictExpand函数执行扩展操作，如果是扩展空的hash表，则直接扩展，如果ht[0]不为空，则开启渐进式rehash过程

/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hash table */
// step1 : 计算真实长度，真实长度为第一个大于等于size的2的次方
unsigned long realsize = _dictNextPower(size);

/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;

/* Rehashing to the same table size is not useful. */
if (realsize == d->ht[0].size) return DICT_ERR;

/* Allocate the new hash table and initialize all pointers to NULL */
// step2 分配新的hash表结构并初始化
n.size = realsize;
n.sizemask = realsize-1;
//分配桶(bucket)数组
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;

/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
// step3 : 判断是初始化ht[0]还是执行rehash过程
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}

/* Prepare a second hash table for incremental rehashing */
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}


5. 删除元素
　　从dict中删除元素，注意如果在rehash的过程中，需要判断是否从ht[1]中删除

/* Search and remove an element */
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
unsigned int h, idx;
dictEntry *he, *prevHe;
int table;

if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);

for (table = 0; table <= 1; table++) {
// step1 : 获取key在hash表中的index
idx = h & d->ht
.sizemask;
he = d->ht.table[idx];
prevHe = NULL;
while(he) {
// step2 : 比较key值，如果相同，则从链表中移除，然后释放
if (dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
else
d->ht.table[idx] = he->next;
if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
}
zfree(he);
d->ht.used--;
return DICT_OK;
}
prevHe = he;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return DICT_ERR; /* not found */
}

int dictDelete(dict *ht, const void *key) {
return dictGenericDelete(ht,key,0);
}

int dictDeleteNoFree(dict *ht, const void *key) {
return dictGenericDelete(ht,key,1);
}

6. rehash过程
　　前面说到dict使用渐进式rehash方式，就是将rehash的过程平摊到每一个增删查改的操作中，在这些操作中，都可以看到这样两条语句： if (dictIsRehashing(d)) _dictRehashStep(d);

/* This function performs just a step of rehashing, and only if there are
* no safe iterators bound to our hash table. When we have iterators in the
* middle of a rehashing we can't mess with the two hash tables otherwise
* some element can be missed or duplicated.
*
* This function is called by common lookup or update operations in the
* dictionary so that the hash table automatically migrates from H1 to H2
* while it is actively used. */
static void _dictRehashStep(dict *d) {
if (d->iterators == 0) dictRehash(d,1);
}


　　

　　dictRehash是真正执行rehash过程的函数

int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;

while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;

/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
// step1 : 跳过连续为空的index，最多访问empty_visits个空index
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
// step2 : 找到一个不为空的index，然后将该bucked里面所有的key移动到新的hash表中
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
unsigned int h;

nextde = de->next;
/* Get the index in the new hash table */
// 计算下标
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
// step3 : 将原来的bucket指针置为NULL
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}

// step3 : 如果已经移动完了所有的，则释放ht[0]
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}

/* More to rehash... */
return 1;
}


7. hash函数
　　dict中使用的hash函数有3种：

　　1.   Thomas Wang's 32bit整数hash函数。

　　2.   MurmurHash2 hash函数

　　3.   大小写敏感的hash函数，基于djb hash。

/* -------------------------- hash functions -------------------------------- */

/* Thomas Wang's 32 bit Mix Function */
unsigned int dictIntHashFunction(unsigned int key)
{
key += ~(key << 15);
key ^=  (key >> 10);
key +=  (key << 3);
key ^=  (key >> 6);
key += ~(key << 11);
key ^=  (key >> 16);
return key;
}

static uint32_t dict_hash_function_seed = 5381;

void dictSetHashFunctionSeed(uint32_t seed) {
dict_hash_function_seed = seed;
}

uint32_t dictGetHashFunctionSeed(void) {
return dict_hash_function_seed;
}

/* MurmurHash2, by Austin Appleby
* Note - This code makes a few assumptions about how your machine behaves -
* 1. We can read a 4-byte value from any address without crashing
* 2. sizeof(int) == 4
*
* And it has a few limitations -
*
* 1. It will not work incrementally.
* 2. It will not produce the same results on little-endian and big-endian
*    machines.
*/
unsigned int dictGenHashFunction(const void *key, int len) {
/* 'm' and 'r' are mixing constants generated offline.
They're not really 'magic', they just happen to work well.  */
uint32_t seed = dict_hash_function_seed;
const uint32_t m = 0x5bd1e995;
const int r = 24;

/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;

/* Mix 4 bytes at a time into the hash */
const unsigned char *data = (const unsigned char *)key;

while(len >= 4) {
uint32_t k = *(uint32_t*)data;

k *= m;
k ^= k >> r;
k *= m;

h *= m;
h ^= k;

data += 4;
len -= 4;
}

/* Handle the last few bytes of the input array  */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};

/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;

return (unsigned int)h;
}

/* And a case insensitive hash function (based on djb hash) */
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
unsigned int hash = (unsigned int)dict_hash_function_seed;

while (len--)
hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
return hash;
}

                        
                        内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理  
                    

                        
                         标签： 
                                                
                        
                    

                相关文章推荐
                
                
                    
                                                                        集群 Redis 使用实践
                                                通过注册表查询Visual C++ Redistributable Package是否安装
                                                Redis首页
                                                Redis文档
                                                Redis社区
                                                Redis下载
                                                Redis支持
                                                Linux环境搭建nginx+2个tomcat+2个redis（主从复制）
                                                redis的主配置文件说明
                                                Windows开发必备素质--何为Visual C++ Redistributable Package？
                                                java基础之连接redis
                                                C++自制Redis 数据库（九） 详细数据库存储结构，线程相关已解决
                                                Redis入门很简单之八【Spring Data Redis初探】
                                                Redis入门很简单之七【使用Jedis实现客户端Sharding】
                                                Redis入门很简单之六【Jedis常见操作】
                                                Redis入门很简单之五【Jedis和Spring的整合】
                                                Redis入门很简单之四【初识Jedis】
                                                Redis入门很简单之三【常见参数配置】
                                                Redis入门很简单之二【常见操作命令】
                                                Redis入门很简单之一【简介与环境搭建】
                                                                    
                
            

            
            
            
                
                    新的分享
                    
                        
                                                        一次教科书级别的Redis高可用架构设计实践 - Redis
                                                        谁说Redis不能存大key
                                                        Redis存储商品热度
                                                        Redis的Java客户端
                                                        redis高可用、redis集群、redis缓存优化
                                                        redis的Linux系统安装与配置、redis的api使用、高级用法之慢查询、pipline事物
                                                        为什么 Redis 要有哨兵机制？
                                                        【Java面试】Redis存在线程安全问题吗？为什么？
                                                        02 Springboot整合redis
                                                        【面试普通人VS高手系列】Redis和Mysql如何保证数据一致性
                                                        Redis 内存满了怎么办？这样设置才正确！
                                                        如何使用 Redis 缓存
                                                    
                    
                
            

            
                
                    章节导航
                    
                        
                        
                        
                    
                
            
        

    
    
        
        
    
     
    
    
    添加评论
    
    
    分享网址
    
    
    分享文章
    
    
    返回顶部
    
    


    
        https://www.geek-share.com/ 
        GeekShare·极客分享·来自极客的分享  © 2016-2019 ·  
        粤ICP备17045047号