您的位置：首页 > 数据库 > Redis

Redis源码分析（十三）——集合类型t_set

2014-10-24 11:04 585 查看

集合类型的编码方式为：REDIS_ENCODING_INTSET和REDIS_ENCODING_HT两种方式。第一个添加进集合的元素决定集合创建时的编码：如果第一个元素为long long类型则为REDIS_ENCODING_INTSET，否则为REDIS_ENCODING_HT。并且当以下情况时将切换为字典编码方式：

**intset保存的整数值个数超过server.set_max_intset_entries(默认为512)

**试图往集合中添加一个新元素，且该元素不能被表示为long long类型。

注意：在字典编码方式时，字典中的键即为保存值的地方，而字典的值则都设为NULL。

集合的主要命令：

交集运算：SINTER和SINTERSTORE命令实现如下：

基本思想：首先将所有集合按元素个数排序，并把元素个数最小的集合作为比较的目标，依次按元素个数从小到大的顺序在其他集合中查找该元素，如果其他结合“都”包含该元素则说明交集中应该保留该元素，否则跳出继续下一个元素的比较。如此即可得到所有的交集元素：本质就是在最小集合中删除不是所有其他集合所有的元素即可

//交集泛型命令（SINTER:直接向客户端返回返回交集运算所得结果元素   SINTERSTORE:将交集所得元素存入传入的目标集合中，并将交集集合保存在数据库中 ）：
void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, robj *dstkey) {

// 集合数组
robj **sets = zmalloc(sizeof(robj*)*setnum);

setTypeIterator *si;
robj *eleobj, *dstset = NULL;
int64_t intobj;
void *replylen = NULL;
unsigned long j, cardinality = 0;
int encoding;

for (j = 0; j < setnum; j++) {

// 取出对象
// 第一次执行时，取出的是 dest 集合
// 之后执行时，取出的都是 source 集合
robj *setobj = dstkey ?
lookupKeyWrite(c->db,setkeys[j]) :
lookupKeyRead(c->db,setkeys[j]);

// 对象不存在，放弃执行，进行清理
if (!setobj) {
zfree(sets);
if (dstkey) {
if (dbDelete(c->db,dstkey)) {
signalModifiedKey(c->db,dstkey);
server.dirty++;
}
addReply(c,shared.czero);
} else {
addReply(c,shared.emptymultibulk);
}
return;
}

// 检查对象的类型
if (checkType(c,setobj,REDIS_SET)) {
zfree(sets);
return;
}

// 将数组指针指向集合对象
sets[j] = setobj;
}

/* Sort sets from the smallest to largest, this will improve our
* algorithm's performance */
// 按基数对集合进行排序，这样提升算法的效率
qsort(sets,setnum,sizeof(robj*),qsortCompareSetsByCardinality);//快速排序，升序

/* The first thing we should output is the total number of elements...
* since this is a multi-bulk write, but at this stage we don't know
* the intersection set size, so we use a trick, append an empty object
* to the output list and save the pointer to later modify it with the
* right length */
// 因为不知道结果集会有多少个元素，所有没有办法直接设置回复的数量
// 这里使用了一个小技巧，直接使用一个 BUFF 列表，
// 然后将之后的回复都添加到列表中
if (!dstkey) {
replylen = addDeferredMultiBulkLength(c);
} else {
/* If we have a target key where to store the resulting set
* create this key with an empty set inside */
dstset = createIntsetObject();
}

/* Iterate all the elements of the first (smallest) set, and test
* the element against all the other sets, if at least one set does
* not include the element it is discarded */
// 遍历基数最小的第一个集合
// 并将它的元素和所有其他集合进行对比
// 如果有至少一个集合不包含这个元素，那么这个元素不属于交集
si = setTypeInitIterator(sets[0]);
while((encoding = setTypeNext(si,&eleobj,&intobj)) != -1) {
// 遍历其他集合，检查元素是否在这些集合中存在
for (j = 1; j < setnum; j++) {

// 跳过第一个集合，因为它是结果集的起始值
if (sets[j] == sets[0]) continue;

// 目标集合元素的编码为 INTSET
// 在其他集合中查找这个对象是否存在
if (encoding == REDIS_ENCODING_INTSET) {
/* intset with intset is simple... and fast */
if (sets[j]->encoding == REDIS_ENCODING_INTSET &&
!intsetFind((intset*)sets[j]->ptr,intobj))
{
break;
/* in order to compare an integer with an object we
* have to use the generic function, creating an object
* for this */
} else if (sets[j]->encoding == REDIS_ENCODING_HT) {
eleobj = createStringObjectFromLongLong(intobj);
if (!setTypeIsMember(sets[j],eleobj)) {
decrRefCount(eleobj);
break;
}
decrRefCount(eleobj);
}

// 元素的编码为 字典
// 在其他集合中查找这个对象是否存在
} else if (encoding == REDIS_ENCODING_HT) {
/* Optimization... if the source object is integer
* encoded AND the target set is an intset, we can get
* a much faster path. */
if (eleobj->encoding == REDIS_ENCODING_INT &&
sets[j]->encoding == REDIS_ENCODING_INTSET &&
!intsetFind((intset*)sets[j]->ptr,(long)eleobj->ptr))
{
break;
/* else... object to object check is easy as we use the
* type agnostic API here. */
} else if (!setTypeIsMember(sets[j],eleobj)) {
break;
}
}
}

/* Only take action when all sets contain the member */
// 如果所有集合都带有目标元素的话，那么执行以下代码
if (j == setnum) {

// SINTER 命令，直接返回结果集元素
if (!dstkey) {
if (encoding == REDIS_ENCODING_HT)
addReplyBulk(c,eleobj);
else
addReplyBulkLongLong(c,intobj);
cardinality++;

// SINTERSTORE 命令，将结果添加到结果集中
} else {
if (encoding == REDIS_ENCODING_INTSET) {
eleobj = createStringObjectFromLongLong(intobj);
setTypeAdd(dstset,eleobj);
decrRefCount(eleobj);
} else {
setTypeAdd(dstset,eleobj);
}
}
}
}
setTypeReleaseIterator(si);

// SINTERSTORE 命令，将结果集关联到数据库
if (dstkey) {
/* Store the resulting set into the target, if the intersection
* is not an empty set. */
// 删除现在可能有的 dstkey
int deleted = dbDelete(c->db,dstkey);

// 如果结果集非空，那么将它关联到数据库中
if (setTypeSize(dstset) > 0) {
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
notifyKeyspaceEvent(REDIS_NOTIFY_SET,"sinterstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}

signalModifiedKey(c->db,dstkey);

server.dirty++;

// SINTER 命令，回复结果集的基数
} else {
setDeferredMultiBulkLength(c,replylen,cardinality);
}

zfree(sets);
}

并集运算：SUNIO和SUNIOSTORE
差集运算：SDIFF和SDIFFSTORE

实现如下：

//并集、差集泛型命令：
void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op) {

// 集合数组
robj **sets = zmalloc(sizeof(robj*)*setnum);

setTypeIterator *si;
robj *ele, *dstset = NULL;
int j, cardinality = 0;
int diff_algo = 1;

// 取出所有集合对象，并添加到集合数组中
for (j = 0; j < setnum; j++) {
robj *setobj = dstkey ?
lookupKeyWrite(c->db,setkeys[j]) :
lookupKeyRead(c->db,setkeys[j]);

// 不存在的集合当作 NULL 来处理
if (!setobj) {
sets[j] = NULL;
continue;
}

// 有对象不是集合，停止执行，进行清理
if (checkType(c,setobj,REDIS_SET)) {
zfree(sets);
return;
}

// 记录对象
sets[j] = setobj;
}

/* Select what DIFF algorithm to use.
*
* 选择使用那个算法来执行计算
*
* Algorithm 1 is O(N*M) where N is the size of the element first set
* and M the total number of sets.
*
* 算法 1 的复杂度为 O(N*M) ，其中 N 为第一个集合的基数，
* 而 M 则为其他集合的数量。
*
* Algorithm 2 is O(N) where N is the total number of elements in all
* the sets.
*
* 算法 2 的复杂度为 O(N) ，其中 N 为所有集合中的元素数量总数。
*
* We compute what is the best bet with the current input here.
*
* 程序通过考察输入来决定使用那个算法
*/
if (op == REDIS_OP_DIFF && sets[0]) {
long long algo_one_work = 0, algo_two_work = 0;

// 遍历所有集合
for (j = 0; j < setnum; j++) {
if (sets[j] == NULL) continue;

// 计算 setnum 乘以 sets[0] 的基数之积
algo_one_work += setTypeSize(sets[0]);
// 计算所有集合的基数之和
algo_two_work += setTypeSize(sets[j]);
}

/* Algorithm 1 has better constant times and performs less operations
* if there are elements in common. Give it some advantage. */
// 算法 1 的常数比较低，优先考虑算法 1
algo_one_work /= 2;
diff_algo = (algo_one_work <= algo_two_work) ? 1 : 2;

if (diff_algo == 1 && setnum > 1) {
/* With algorithm 1 it is better to order the sets to subtract
* by decreasing size, so that we are more likely to find
* duplicated elements ASAP. */
// 如果使用的是算法 1 ，那么最好对 sets[0] 以外的其他集合进行排序
// 这样有助于优化算法的性能
qsort(sets+1,setnum-1,sizeof(robj*),
qsortCompareSetsByRevCardinality);
}
}

/* We need a temp set object to store our union. If the dstkey
* is not NULL (that is, we are inside an SUNIONSTORE operation) then
* this set object will be the resulting object to set into the target key
*
* 使用一个临时集合来保存结果集，如果程序执行的是 SUNIONSTORE 命令，
* 那么这个结果将会成为将来的集合值对象。
*/
dstset = createIntsetObject();

// 执行的是并集计算
if (op == REDIS_OP_UNION) {
/* Union is trivial, just add every element of every set to the
* temporary set. */
// 遍历所有集合，将元素添加到结果集里就可以了
for (j = 0; j < setnum; j++) {
if (!sets[j]) continue; /* non existing keys are like empty sets */

si = setTypeInitIterator(sets[j]);
while((ele = setTypeNextObject(si)) != NULL) {
// setTypeAdd 只在集合不存在时，才会将元素添加到集合，并返回 1
if (setTypeAdd(dstset,ele)) cardinality++;
decrRefCount(ele);
}
setTypeReleaseIterator(si);
}

// 执行的是差集计算，并且使用算法 1
} else if (op == REDIS_OP_DIFF && sets[0] && diff_algo == 1) {
/* DIFF Algorithm 1:
*
* 差集算法 1 ：
*
* We perform the diff by iterating all the elements of the first set,
* and only adding it to the target set if the element does not exist
* into all the other sets.
*
* 程序遍历 sets[0] 集合中的所有元素，
* 并将这个元素和其他集合的所有元素进行对比，
* 只有这个元素不存在于其他所有集合时，
* 才将这个元素添加到结果集。
*
* This way we perform at max N*M operations, where N is the size of
* the first set, and M the number of sets.
*
* 这个算法执行最多 N*M 步， N 是第一个集合的基数，
* 而 M 是其他集合的数量。
*/
si = setTypeInitIterator(sets[0]);
while((ele = setTypeNextObject(si)) != NULL) {

// 检查元素在其他集合是否存在
for (j = 1; j < setnum; j++) {
if (!sets[j]) continue; /* no key is an empty set. */
if (sets[j] == sets[0]) break; /* same set! */
if (setTypeIsMember(sets[j],ele)) break;
}

// 只有元素在所有其他集合中都不存在时，才将它添加到结果集中
if (j == setnum) {
/* There is no other set with this element. Add it. */
setTypeAdd(dstset,ele);
cardinality++;
}

decrRefCount(ele);
}
setTypeReleaseIterator(si);

// 执行的是差集计算，并且使用算法 2
} else if (op == REDIS_OP_DIFF && sets[0] && diff_algo == 2) {
/* DIFF Algorithm 2:
*
* 差集算法 2 ：
*
* Add all the elements of the first set to the auxiliary set.
* Then remove all the elements of all the next sets from it.
*
* 将 sets[0] 的所有元素都添加到结果集中，
* 然后遍历其他所有集合，将相同的元素从结果集中删除。
*
* This is O(N) where N is the sum of all the elements in every set.
*
* 算法复杂度为 O(N) ，N 为所有集合的基数之和。
*/
for (j = 0; j < setnum; j++) {
if (!sets[j]) continue; /* non existing keys are like empty sets */

si = setTypeInitIterator(sets[j]);
while((ele = setTypeNextObject(si)) != NULL) {
// sets[0] 时，将所有元素添加到集合
if (j == 0) {
if (setTypeAdd(dstset,ele)) cardinality++;
// 不是 sets[0] 时，将所有集合从结果集中移除
} else {
if (setTypeRemove(dstset,ele)) cardinality--;
}
decrRefCount(ele);
}
setTypeReleaseIterator(si);

/* Exit if result set is empty as any additional removal
* of elements will have no effect. */
if (cardinality == 0) break;
}
}

/* Output the content of the resulting set, if not in STORE mode */
// 执行的是 SDIFF 或者 SUNION
// 打印结果集中的所有元素
if (!dstkey) {
addReplyMultiBulkLen(c,cardinality);

// 遍历并回复结果集中的元素
si = setTypeInitIterator(dstset);
while((ele = setTypeNextObject(si)) != NULL) {
addReplyBulk(c,ele);
decrRefCount(ele);
}
setTypeReleaseIterator(si);

decrRefCount(dstset);

// 执行的是 SDIFFSTORE 或者 SUNIONSTORE
} else {
/* If we have a target key where to store the resulting set
* create this key with the result set inside */
// 现删除现在可能有的 dstkey
int deleted = dbDelete(c->db,dstkey);

// 如果结果集不为空，将它关联到数据库中
if (setTypeSize(dstset) > 0) {
dbAdd(c->db,dstkey,dstset);
// 返回结果集的基数
addReplyLongLong(c,setTypeSize(dstset));
notifyKeyspaceEvent(REDIS_NOTIFY_SET,
op == REDIS_OP_UNION ? "sunionstore" : "sdiffstore",
dstkey,c->db->id);

// 结果集为空
} else {
decrRefCount(dstset);
// 返回 0
addReply(c,shared.czero);
if (deleted)
notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}

signalModifiedKey(c->db,dstkey);

server.dirty++;
}

zfree(sets);
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航