您的位置:首页 > 数据库 > Redis

结合redis设计与实现的redis源码学习-4-dict(字典)

2017-09-28 22:51 851 查看
字典,又称符号表,关联数组或者映射,是一种用于保存键值对的抽象数据结构,要保证的就是在字典中每个键都是独一无二的,在字典中根据键来寻找键值对。 

redis的数据库就是使用字典作为底层实现的,对数据库的增删改查也是建立在对字典的操作之上。 

redis和字典使用哈希表作为底层实现,一个哈希表里面可以有多个哈希表节点,而每个哈希表节点就保存了字典中的一个键值对。 

之前我只是了解字典的实现方式和原理,但是自己没有实现过,所以这次会仔细的看一下redis的实现方式,顺便手敲一遍。 

redis的字典具有以下特性: 

1、使用MurmurHash2算法来计算键的哈希值; 

2、哈希表使用链地址发来解决键冲突,被分配到同一索引的多个键值对会连成一个单向链表; 

3、对哈希表进行扩展或收缩时,会将现有哈希表的所有键值对rehash到新哈希表里,这个过程是渐进式的完成的。 

先来认识一下关键的结构体,在dict.h中定义:
//哈希表
typedef struct dictht{
dictEntry **table;//哈希表数组
unsigned long size;//哈希表大小
unsigned long sizemask;//哈希表大小掩码,用于计算索引值,总是等于size-1
unsigned long used;//哈希表已有的节点数量
}dictht;
//哈希表节点
typedef struct dictEntry{
void *key;//键
union{//值
void *val;
unit64_t u64;
int64_t s64;
}v;
struct dictEntry *next;//指向下个哈希表节点,形成链表
}dictEntry;
//字典
typedef struct dict{
dictType *type;//类型特定函数
void *privdata;//私有数据
dictht ht[2];//哈希表,一般只使用ht[0],ht[1]会在rehash时使用
int trehashidx;//rehash索引,当rehash不在进行时,为-1
}dict;
typedef struct dictType{
unsigned int (*hashaFunction)(const void *key);//计算哈希值的函数
void *(*keyDup)(void*privdata, const void *key);//复制键的函数
void *(valDup)(void*privdata, const void *obj);//复制值的函数
int (*keyCompare)(void *privdata, const void *key1,const void *key2);//对比键的函数
void (*keyDestructor)(void *privdata, void *key);//销毁键的函数
void (*valDestructor)(void *privdata, void *obj);
}dictType;//销毁值得函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

上代码:
/* Hash Tables Implementation.*/
#include <stdint.h>

#ifndef __DICT_H
#define __DICT_H
//定义了两个标志成功与错误的值
#define DICT_OK 0
#define DICT_ERR 1

/* Unused arguments generate annoying warnings... */
#define DICT_NOTUSED(V) ((void) V)
//见上方结构体
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry;
//见上方结构体
typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
} dictType;
//见上方结构体
typedef struct dictht {
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;
//见上方结构体
typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
int iterators; /* number of iterators currently running */
} dict;

/* If safe is set to 1 this is a safe iterator, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
//字典的迭代器,如果是安全的迭代器,就可以调用dictAdd, dictFind和其他函数,要不就只能用dictNext
typedef struct dictIterator {
dict *d;//当前的字典
long index;//下标
int table, safe;//表和安全值
dictEntry *entry, *nextEntry;//字典实体
/* unsafe iterator fingerprint for misuse detection. 避免迭代器滥用的标记*/
long long fingerprint;
} dictIterator;
//字典扫描方法
typedef void (dictScanFunction)(void *privdata, const dictEntry *de);

/* This is the initial size of every hash table 初始化哈希表数目 */
#define DICT_HT_INITIAL_SIZE     4

/* ------------------------------- Macros ------------------------------------*/
//字典释放函数的宏定义
#define dictFreeVal(d, entry) \
if ((d)->type->valDestructor) \
(d)->type->valDestructor((d)->privdata, (entry)->v.val)
//字典复制函数的宏定义
#define dictSetVal(d, entry, _val_) do { \
if ((d)->type->valDup) \
entry->v.val = (d)->type->valDup((d)->privdata, _val_); \
240e7
else \
entry->v.val = (_val_); \
} while(0)
//设置dictEntry中有符号类型的值
#define dictSetSignedIntegerVal(entry, _val_) \
do { entry->v.s64 = _val_; } while(0)
//设置dictEntry中无符号类型的值
#define dictSetUnsignedIntegerVal(entry, _val_) \
do { entry->v.u64 = _val_; } while(0)
//设置dictEntry中double类型的值
#define dictSetDoubleVal(entry, _val_) \
do { entry->v.d = _val_; } while(0)
//释放key的函数宏定义
#define dictFreeKey(d, entry) \
if ((d)->type->keyDestructor) \
(d)->type->keyDestructor((d)->privdata, (entry)->key)
//设置key的函数宏定义,会判断是否调用类型key的复制函数
#define dictSetKey(d, entry, _key_) do { \
if ((d)->type->keyDup) \
entry->key = (d)->type->keyDup((d)->privdata, _key_); \
else \
entry->key = (_key_); \
} while(0)
//对比函数的宏定义
#define dictCompareKeys(d, key1, key2) \
(((d)->type->keyCompare) ? \
(d)->type->keyCompare((d)->privdata, key1, key2) : \
(key1) == (key2))

#define dictHashKey(d, key) (d)->type->hashFunction(key)//哈希计算方法
#define dictGetKey(he) ((he)->key)//获取dictEntrykey值
#define dictGetVal(he) ((he)->v.val)//获取dictEntry中联合体v的val值
#define dictGetSignedIntegerVal(he) ((he)->v.s64)//获取dictEntry的有符号值
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)//获取dictEntry的无符号值
#define dictGetDoubleVal(he) ((he)->v.d)//获取dictEntry的double值
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)//获取字典总大小
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)//获取字典中正在使用的数量
#define dictIsRehashing(d) ((d)->rehashidx != -1)//字典是否正在rehash

/* API */
dict *dictCreate(dictType *type, void *privDataPtr);//创建dict字典
int dictExpand(dict *d, unsigned long size);//字典增加
int dictAdd(dict *d, void *key, void *val);//将指定的键值添加到字典
dictEntry *dictAddRaw(dict *d, void *key);//添加一个只有key的dictEntry
int dictReplace(dict *d, void *key, void *val);//将指定键值添加到字典,如果该键已经存在,就替换掉旧值
dictEntry *dictReplaceRaw(dict *d, void *key);//将字典的一个键替换掉
int dictDelete(dict *d, const void *key);//删除指定键值对
int dictDeleteNoFree(dict *d, const void *key);//删除指定空值的键
void dictRelease(dict *d);//释放字典
dictEntry * dictFind(dict *d, const void *key);//根据key寻找字典
void *dictFetchValue(dict *d, const void *key);//返回给定键的值
int dictResize(dict *d);//计算大小
dictIterator *dictGetIterator(dict *d);//获取迭代器
dictIterator *dictGetSafeIterator(dict *d);//获取字典的安全迭代器
dictEntry *dictNext(dictIterator *iter);根据字典迭代器获取下一个字典
void dictReleaseIterator(dictIterator *iter);//释放字典迭代器
dictEntry *dictGetRandomKey(dict *d);//随机获取一个键值对
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);//获取一些键值对
void dictGetStats(char *buf, size_t bufsize, dict *d);//获取当前字典状态
unsigned int dictGenHashFunction(const void *key, int len);//计算哈希索引值
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);//计算哈希索引值
void dictEmpty(dict *d, void(callback)(void*));//清空字典
void dictEnableResize(void);//启用调整方法
void dictDisableResize(void);//禁用调整方法
int dictRehash(dict *d, int n);//重新散列字典
int dictRehashMilliseconds(dict *d, int ms);//在给定时间内循环执行rehaxi
void dictSetHashFunctionSeed(unsigned int initval);//设置哈希算法种子
unsigned int dictGetHashFunctionSeed(void);//获取哈希算法种子
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);//遍历字典方法

/* Hash table types 哈希表类型 */
extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;

#endif /* __DICT_H */
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151

.h文件主要定义了字典所用到的结构体和方法,下面来看.c文件
/* Hash Tables Implementation.*/

#include "fmacros.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <limits.h>
#include <sys/time.h>
#include <ctype.h>

#include "dict.h"
#include "zmalloc.h"
#include "redisassert.h"
//redis采用的是写实复制法,只有在调整数量大于一定比率才行
static int dict_can_resize = 1;
static unsigned int dict_force_resize_ratio = 5;

/* -------------------------- private prototypes ---------------------------- */
//字典是否需要扩展
static int _dictExpandIfNeeded(dict *ht);
static unsigned long _dictNextPower(unsigned long size);
static int _dictKeyIndex(dict *ht, const void *key);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);//初始化字典

/* -------------------------- hash functions -------------------------------- */
//哈希算法采用 Thomas wang的32位混合算法计算key值,我对哈希算法不是特别了解,这章写完会先将字典涉及的算法学习一遍
/* Thomas Wang's 32 bit Mix Function */
unsigned int dictIntHashFunction(unsigned int key)
{
key += ~(key << 15);
key ^=  (key >> 10);
key +=  (key << 3);
key ^=  (key >> 6);
key += ~(key << 11);
key ^=  (key >> 16);
return key;
}
//哈希种子,类似于随机数种子
static uint32_t dict_hash_function_seed = 5381;
//设置哈希种子
void dictSetHashFunctionSeed(uint32_t seed) {
dict_hash_function_seed = seed;
}
//获取哈希种子
uint32_t dictGetHashFunctionSeed(void) {
return dict_hash_function_seed;
}
//输入key和key的长度,计算索引
unsigned int dictGenHashFunction(const void *key, int len) {
/* 'm' and 'r' are mixing constants generated offline.
They're not really 'magic', they just happen to work well.  */
uint32_t seed = dict_hash_function_seed;
const uint32_t m = 0x5bd1e995;
const int r = 24;
//初始化哈希为一个随机值,种子异或长度的结果
/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;
//将数据转为char*类型,按字节操作
/* Mix 4 bytes at a time into the hash */
const unsigned char *data = (const unsigned char *)key;
//这里是将数据以四字节为单位来处理的,看不懂为什么这样做,学习哈希算法势在必行
while(len >= 4) {
uint32_t k = *(uint32_t*)data;

k *= m;
k ^= k >> r;
k *= m;

h *= m;
h ^= k;

data += 4;
len -= 4;
}
//如果后面剩余的不足四字节,分别处理
/* Handle the last few bytes of the input array  */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};

/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;

return (unsigned int)h;
}

/* And a case insensitive hash function (based on djb hash) 这是一种比较简单的哈希算法*/
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
unsigned int hash = (unsigned int)dict_hash_function_seed;
//这里是hash值乘以33再加上每个字节变成小写的值,神奇!
while (len--)
hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
return hash;
}
//重置哈希表,这个函数只在ht_destroy中调用
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
}

/* Create a new hash table */
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));

_dictInit(d,type,privDataPtr);
return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1;
d->iterators = 0;
return DICT_OK;
}
//调整哈希表,用最小的值来保存值
int dictResize(dict *d)
{
int minimal;

if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
minimal = d->ht[0].used;
if (minimal < DICT_HT_INITIAL_SIZE)
minimal = DICT_HT_INITIAL_SIZE;
return dictExpand(d, minimal);
}

/* Expand or create the hash table 扩大哈希表*/
int dictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hash table */
unsigned long realsize = _dictNextPower(size);//以2的次方想上取值
/* the size is invalid if it is smaller than the number of 再次判断数量是否符合
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;

/* Rehashing to the same table size is not useful. rehash到相同的表大小不可用*/
if (realsize == d->ht[0].size) return DICT_ERR;

/* Allocate the new hash table and initialize all pointers to NULL 创建新的哈希表并初始化*/
n.size = realsize;
n.sizemask = realsize-1;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;

/* Is this the first initialization? If so it's not really a rehashing //如果表一为空,那么直接返回
* we just set the first hash table so that it can accept keys. */
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}
//这里赋值给第二张表,也就是rehash的目标表
/* Prepare a second hash table for incremental rehashing */
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}
//rehash,从旧表映射到新表中,如果返回1说明还没有迁移完全
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
//如果源哈希表使用的不为0,那么rehash未结束
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;

/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;//如果该值等于0,那么rehash未结束
}
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT 移动源表到新哈希表*/
while(de) {
unsigned int h;

nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}

/* Check if we already rehashed the whole table... 如果我们已经rehash了整个表,那么就把ht[1]变为ht[0]*/
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
//否则rehash未完成
/* More to rehash... */
return 1;
}
//获取当前毫秒时间
long long timeInMilliseconds(void) {
struct timeval tv;

gettimeofday(&tv,NULL);
return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
}
//在给定时间内循环执行rehash
/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
int dictRehashMilliseconds(dict *d, int ms) {
long long start = timeInMilliseconds();
int rehashes = 0;

while(dictRehash(d,100)) {
rehashes += 100;
if (timeInMilliseconds()-start > ms) break;
}
return rehashes;
}
//当没有迭代器的时候,进行rehash
static void _dictRehashStep(dict *d) {
if (d->iterators == 0) dictRehash(d,1);
}
//添加一个dictEntry
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key);

if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}
//添加一个指定key值得dictEntry
dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht;

if (dictIsRehashing(d)) _dictRehashStep(d);

/* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key)) == -1)
return NULL;

/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];//这里如果正在rehash则给ht[1]添加,否则给ht[0]添加
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;

/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
}
//插入一个值,如果存在就替换旧的值
int dictReplace(dict *d, void *key, void *val)
{
dictEntry *entry, auxentry;
//先尝试插入,如果不存在就成功啦
/* Try to add the element. If the key
* does not exists dictAdd will suceed. */
if (dictAdd(d, key, val) == DICT_OK)
return 1;
/* It already exists, get the entry */
entry = dictFind(d, key);//如果存在,那么获取到该键值对
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
auxentry = *entry;
dictSetVal(d, entry, val);
dictFreeVal(d, &auxentry);
return 0;
}
//添加,如果存在就不添加
dictEntry *dictReplaceRaw(dict *d, void *key) {
dictEntry *entry = dictFind(d,key);

return entry ? entry : dictAddRaw(d,key);
}

/* Search and remove an element 查找并删除一个元素*/
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
unsigned int h, idx;
dictEntry *he, *prevHe;
int table;

if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);//计算key的哈希值

for (table = 0; table <= 1; table++) {
idx = h & d->ht.sizemask;
he = d->ht
.table[idx]; prevHe = NULL; while(he) { if (key==he->key || dictCompareKeys(d, key, he->key)) { /* Unlink the element from the list */ if (prevHe) prevHe->next = he->next; else d->ht
.table[idx] = he->next; if (!nofree) { dictFreeKey(d, he); dictFreeVal(d, he); } zfree(he); d->ht
.used--; return DICT_OK; } prevHe = he; he = he->next; } if (!dictIsRehashing(d)) break; } return DICT_ERR; /* not found */ } //free删除 int dictDelete(dict *ht, const void *key) { return dictGenericDelete(ht,key,0); } //不free删除 int dictDeleteNoFree(dict *ht, const void *key) { return dictGenericDelete(ht,key,1); } /* Destroy an entire dictionary 清空哈希表*/ int _dictClear(dict *d, dictht *ht, void(callback)(void *)) { unsigned long i; /* Free all the elements */ for (i = 0; i < ht->size && ht->used > 0; i++) { dictEntry *he, *nextHe; //如果有回调 if (callback && (i & 65535) == 0) callback(d->privdata); if ((he = ht->table[i]) == NULL) continue; while(he) {//像链表一样释放每个节点 nextHe = he->next; dictFreeKey(d, he); dictFreeVal(d, he); zfree(he); ht->used--; he = nextHe; } } /* Free the table and the allocated cache structure */ zfree(ht->table); /* Re-initialize the table */ _dictReset(ht); return DICT_OK; /* never fails */ } //清空字典 void dictRelease(dict *d) { _dictClear(d,&d->ht[0],NULL); _dictClear(d,&d->ht[1],NULL); zfree(d); } //在字典中找key dictEntry *dictFind(dict *d, const void *key) { dictEntry *he; unsigned int h, idx, table; //如果新旧字典都为空,那么字典为空 if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */ if (dictIsRehashing(d)) _dictRehashStep(d);//如果正在rehash那么就查看进行状态 h = dictHashKey(d, key);//计算hash值 for (table = 0; table <= 1; table++) {//在新旧两个表中找 idx = h & d->ht
.sizemask; he = d->ht
.table[idx]; while(he) {//遍历该表上的key if (key==he->key || dictCompareKeys(d, key, he->key)) return he; he = he->next; } if (!dictIsRehashing(d)) return NULL; } return NULL; } //获取字典的值 void *dictFetchValue(dict *d, const void *key) { dictEntry *he; he = dictFind(d,key); return he ? dictGetVal(he) : NULL; } //根据字典属性计算指纹,说是防止不安全迭代器的使用 long long dictFingerprint(dict *d) { long long integers[6], hash = 0; int j; integers[0] = (long) d->ht[0].table; integers[1] = d->ht[0].size; integers[2] = d->ht[0].used; integers[3] = (long) d->ht[1].table; integers[4] = d->ht[1].size; integers[5] = d->ht[1].used; /* We hash N integers by summing every successive integer with the integer * hashing of the previous sum. Basically: * * Result = hash(hash(hash(int1)+int2)+int3) ... * * This way the same set of integers in a different order will (likely) hash * to a different number. */ for (j = 0; j < 6; j++) { hash += integers[j]; /* For the hashing step we use Tomas Wang's 64 bit integer hash. */ hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1; hash = hash ^ (hash >> 24); hash = (hash + (hash << 3)) + (hash << 8); // hash * 265 hash = hash ^ (hash >> 14); hash = (hash + (hash << 2)) + (hash << 4); // hash * 21 hash = hash ^ (hash >> 28); hash = hash + (hash << 31); } return hash; } //获取迭代器-普通 dictIterator *dictGetIterator(dict *d) { dictIterator *iter = zmalloc(sizeof(*iter)); iter->d = d; iter->table = 0; iter->index = -1; iter->safe = 0; iter->entry = NULL; iter->nextEntry = NULL; return iter; } //获取安全迭代器 dictIterator *dictGetSafeIterator(dict *d) { dictIterator *i = dictGetIterator(d); i->safe = 1; return i; } //迭代器指向下一个集合 dictEntry *dictNext(dictIterator *iter) { while (1) { if (iter->entry == NULL) { dictht *ht = &iter->d->ht[iter->table]; if (iter->index == -1 && iter->table == 0) {//index为-1说明还没有开始使用,设置迭代器数量加1或者计算当前状态指纹 if (iter->safe) iter->d->iterators++; else iter->fingerprint = dictFingerprint(iter->d); } iter->index++;//下标+1 if (iter->index >= (long) ht->size) { if (dictIsRehashing(iter->d) && iter->table == 0) {//如果正在rehash且当前table是ht[0],那么访问ht[1]的第一个元素index为0 iter->table++; iter->index = 0; ht = &iter->d->ht[1]; } else { break; } } iter->entry = ht->table[iter->index]; } else { iter->entry = iter->nextEntry; } if (iter->entry) { /* We need to save the 'next' here, the iterator user * may delete the entry we are returning. */ iter->nextEntry = iter->entry->next; return iter->entry; } } return NULL; } //释放迭代器 void dictReleaseIterator(dictIterator *iter) { if (!(iter->index == -1 && iter->table == 0)) { if (iter->safe) iter->d->iterators--; else assert(iter->fingerprint == dictFingerprint(iter->d)); } zfree(iter); } //获取一个随机的key值 dictEntry *dictGetRandomKey(dict *d) { dictEntry *he, *orighe; unsigned int h; int listlen, listele; if (dictSize(d) == 0) return NULL; if (dictIsRehashing(d)) _dictRehashStep(d); if (dictIsRehashing(d)) { do {//如果正在rehash,那么就随机计算两个表中的一个元素 /* We are sure there are no elements in indexes from 0 * to rehashidx-1 */ h = d->rehashidx + (random() % (d->ht[0].size + d->ht[1].size - d->rehashidx)); he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] : d->ht[0].table[h]; } while(he == NULL);//随机的key不存在就继续 } else { do {//这里直接随机,为空继续 h = random() & d->ht[0].sizemask; he = d->ht[0].table[h]; } while(he == NULL); } /* Now we found a non empty bucket, but it is a linked * list and we need to get a random element from the list. * The only sane way to do so is counting the elements and * select a random index. */ listlen = 0; orighe = he; while(he) { he = he->next; listlen++; } listele = random() % listlen;//在这个表上再次随机找一个 he = orighe; while(listele--) he = he->next; return he; } //随机获取一些key unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { unsigned long j; /* internal hash table id, 0 or 1. */ unsigned long tables; /* 1 or 2 tables? */ unsigned long stored = 0, maxsizemask; unsigned long maxsteps; //判断想获取的数量是否大于字典大小 if (dictSize(d) < count) count = dictSize(d); maxsteps = count*10; /* Try to do a rehashing work proportional to 'count'. 判断不在rehash才继续 */ for (j = 0; j < count; j++) { if (dictIsRehashing(d)) _dictRehashStep(d); else break; } tables = dictIsRehashing(d) ? 2 : 1; maxsizemask = d->ht[0].sizemask; if (tables > 1 && maxsizemask < d->ht[1].sizemask) maxsizemask = d->ht[1].sizemask; /* Pick a random point inside the larger table. */ unsigned long i = random() & maxsizemask; unsigned long emptylen = 0; /* Continuous empty entries so far. */ while(stored < count && maxsteps--) { for (j = 0; j < tables; j++) { /* Invariant of the dict.c rehashing: up to the indexes already * visited in ht[0] during the rehashing, there are no populated * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */ if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) { /* Moreover, if we are currently out of range in the second * table, there will be no elements in both tables up to * the current rehashing index, so we jump if possible. * (this happens when going from big to small table). */ if (i >= d->ht[1].size) i = d->rehashidx; continue; } if (i >= d->ht[j].size) continue; /* Out of range for this table. */ dictEntry *he = d->ht[j].table[i]; /* Count contiguous empty buckets, and jump to other * locations if they reach 'count' (with a minimum of 5). */ if (he == NULL) { emptylen++; if (emptylen >= 5 && emptylen > count) { i = random() & maxsizemask; emptylen = 0; } } else { emptylen = 0; while (he) { /* Collect all the elements of the buckets found non * empty while iterating. */ *des = he; des++; he = he->next; stored++; if (stored == count) return stored; } } } i = (i+1) & maxsizemask; } return stored; } //翻转位操作,屌 static unsigned long rev(unsigned long v) { unsigned long s = 8 * sizeof(v); // bit size; must be power of 2 unsigned long mask = ~0; while ((s >>= 1) > 0) { mask ^= (mask << s); v = ((v >> s) & mask) | ((v << s) & ~mask); } return v; } 字典扫描 unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata) { dictht *t0, *t1; const dictEntry *de; unsigned long m0, m1; if (dictSize(d) == 0) return 0; if (!dictIsRehashing(d)) { t0 = &(d->ht[0]);//t0指向表一 m0 = t0->sizemask; /* Emit entries at cursor */ de = t0->table[v & m0]; while (de) { fn(privdata, de);//遍历扫描ht[0] de = de->next; } } else { t0 = &d->ht[0]; t1 = &d->ht[1]; /* Make sure t0 is the smaller and t1 is the bigger table */ if (t0->size > t1->size) { t0 = &d->ht[1]; t1 = &d->ht[0]; } m0 = t0->sizemask; m1 = t1->sizemask; /* Emit entries at cursor */ de = t0->table[v & m0]; while (de) { fn(privdata, de); de = de->next; } /* Iterate over indices in larger table that are the expansion * of the index pointed to by the cursor in the smaller table */ do { /* Emit entries at cursor */ de = t1->table[v & m1]; while (de) { fn(privdata, de); de = de->next; } /* Increment bits not covered by the smaller mask */ v = (((v | m0) + 1) & ~m0) | (v & m0); /* Continue while bits covered by mask difference is non-zero */ } while (v & (m0 ^ m1)); } /* Set unmasked bits so incrementing the reversed cursor * operates on the masked bits of the smaller table */ v |= ~m0; /* Increment the reverse cursor */ v = rev(v); v++; v = rev(v); return v; } /* Expand the hash table if needed 根据需求扩大哈希表*/ static int _dictExpandIfNeeded(dict *d) { /* Incremental rehashing already in progress. Return. */ if (dictIsRehashing(d)) return DICT_OK; /* If the hash table is empty expand it to the initial size. */ if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE); /* If we reached the 1:1 ratio, and we are allowed to resize the hash * table (global setting) or we should avoid it but the ratio between * elements/buckets is over the "safe" threshold, we resize doubling * the number of buckets. */ if (d->ht[0].used >= d->ht[0].size && (dict_can_resize || d->ht[0].used/d->ht[0].size > dict_force_resize_ratio)) { return dictExpand(d, d->ht[0].used*2); } return DICT_OK; } /* Our hash table capability is a power of two */ static unsigned long _dictNextPower(unsigned long size) { unsigned long i = DICT_HT_INITIAL_SIZE; if (size >= LONG_MAX) return LONG_MAX; while(1) { if (i >= size) return i; i *= 2;//2的次方成长 } } //获取key值对应的哈希表索引值,如果key已经存在则返回-1 static int _dictKeyIndex(dict *d, const void *key) { unsigned int h, idx, table; dictEntry *he; /* Expand the hash table if needed */ if (_dictExpandIfNeeded(d) == DICT_ERR) return -1; /* Compute the key hash value */ h = dictHashKey(d, key); for (table = 0; table <= 1; table++) { idx = h & d->ht
.sizemask; /* Search if this slot does not already contain the given key */ he = d->ht
.table[idx]; while(he) { if (key==he->key || dictCompareKeys(d, key, he->key)) return -1; he = he->next; } if (!dictIsRehashing(d)) break; } return idx; } //清空字典 void dictEmpty(dict *d, void(callback)(void*)) { _dictClear(d,&d->ht[0],callback); _dictClear(d,&d->ht[1],callback); d->rehashidx = -1; d->iterators = 0; } //字典支持改变大小 void dictEnableResize(void) { dict_can_resize = 1; } //字典不支持改变大小 void dictDisableResize(void) { dict_can_resize = 0; } //下面是字典的debug函数 #define DICT_STATS_VECTLEN 50 size_t _dictGetStatsHt(char *buf, size_t bufsize, dictht *ht, int tableid) { unsigned long i, slots = 0, chainlen, maxchainlen = 0; unsigned long totchainlen = 0; unsigned long clvector[DICT_STATS_VECTLEN]; size_t l = 0; if (ht->used == 0) { return snprintf(buf,bufsize, "No stats available for empty dictionaries\n"); } /* Compute stats. */ for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0; for (i = 0; i < ht->size; i++) { dictEntry *he; if (ht->table[i] == NULL) { clvector[0]++; continue; } slots++; /* For each hash entry on this slot... */ chainlen = 0; he = ht->table[i]; while(he) { chainlen++; he = he->next; } clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++; if (chainlen > maxchainlen) maxchainlen = chainlen; totchainlen += chainlen; } /* Generate human readable stats. */ l += snprintf(buf+l,bufsize-l, "Hash table %d stats (%s):\n" " table size: %ld\n" " number of elements: %ld\n" " different slots: %ld\n" " max chain length: %ld\n" " avg chain length (counted): %.02f\n" " avg chain length (computed): %.02f\n" " Chain length distribution:\n", tableid, (tableid == 0) ? "main hash table" : "rehashing target", ht->size, ht->used, slots, maxchainlen, (float)totchainlen/slots, (float)ht->used/slots); for (i = 0; i < DICT_STATS_VECTLEN-1; i++) { if (clvector[i] == 0) continue; if (l >= bufsize) break; l += snprintf(buf+l,bufsize-l, " %s%ld: %ld (%.02f%%)\n", (i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100); } /* Unlike snprintf(), teturn the number of characters actually written. */ if (bufsize) buf[bufsize-1] = '\0'; return strlen(buf); } void dictGetStats(char *buf, size_t bufsize, dict *d) { size_t l; char *orig_buf = buf; size_t orig_bufsize = bufsize; l = _dictGetStatsHt(buf,bufsize,&d->ht[0],0); buf += l; bufsize -= l; if (dictIsRehashing(d) && bufsize > 0) { _dictGetStatsHt(buf,bufsize,&d->ht[1],1); } /* Make sure there is a NULL term at the end. */ if (orig_bufsize) orig_buf[orig_bufsize-1] = '\0'; }
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐