您的位置:首页 > 理论基础 > 数据结构算法

redis原理-数据结构

2016-02-17 09:30 465 查看
redis原理-数据结构
一、  内存分配

redis内存分配函数是在文件zmalloc.h和zmalloc.c里面进行声明和定义的,主要的函数如下:

void*zmalloc(size_t size);//分配内存

void*zrealloc(void *ptr, size_t size); //重分配内存

voidzfree(void *ptr);//释放内存

redis使用了zmalloc zrealloc zfree来封装了内存管理的函数,这里针对不同的平台来封装,从而屏蔽了底层的差异性实现跨平台,定义如下:

#ifdefined(USE_TCMALLOC)//如果系统存在tcmalloc

#defineZMALLOC_LIB ("tcmalloc-" __xstr(TC_VERSION_MAJOR) "."__xstr(TC_VERSION_MINOR))

#include<google/tcmalloc.h>

#if(TC_VERSION_MAJOR == 1 && TC_VERSION_MINOR >= 6) ||(TC_VERSION_MAJOR > 1)

#defineHAVE_MALLOC_SIZE 1

#definezmalloc_size(p) tc_malloc_size(p)

#else

#error"Newer version of tcmalloc required"

#endif

 

#elifdefined(USE_JEMALLOC)  //如果系统存在jemalloc

#defineZMALLOC_LIB ("jemalloc-" __xstr(JEMALLOC_VERSION_MAJOR) "."__xstr(JEMALLOC_VERSION_MINOR) "." __xstr(JEMALLOC_VERSION_BUGFIX))

#include<jemalloc/jemalloc.h>

#if(JEMALLOC_VERSION_MAJOR == 2 && JEMALLOC_VERSION_MINOR >= 1) ||(JEMALLOC_VERSION_MAJOR > 2)

#defineHAVE_MALLOC_SIZE 1

#definezmalloc_size(p) je_malloc_usable_size(p)

#else

#error"Newer version of jemalloc required"

#endif

 

#elifdefined(__APPLE__) //如果是苹果平台

#include<malloc/malloc.h>

#defineHAVE_MALLOC_SIZE 1

#definezmalloc_size(p) malloc_size(p)

#endif

 

#ifndefZMALLOC_LIB

#defineZMALLOC_LIB "libc"

#endif

定义平台之间的差异,主要是tcmalloc(google)、jemalloc(facebook)、苹果平台。

上边说过,封装就是为了屏蔽底层平台的差异,同时方便自己实现相关的统计函数。具体来说就是:

若系统中存在Google的TC_MALLOC库,则使用tc_malloc一族函数代替原本的malloc一族函数。

若当前系统是Mac系统,则使用<malloc/malloc.h>中的内存分配函数。

其他情况,在每一段分配好的空间前头,同时多分配一个定长的字段,用来记录分配的空间大小。

#ifdefHAVE_MALLOC_SIZE

#definePREFIX_SIZE (0)

#else

#ifdefined(__sun) || defined(__sparc) || defined(__sparc__)                                                                          

#definePREFIX_SIZE (sizeof(long long))

#else                                                                                                                                 

#definePREFIX_SIZE (sizeof(size_t))

#endif                                                                                                                                

#endif    

如果是sun下就使用 sizeof(long long),如果是linux就使用sizeof(size_t)了。

 

二、  简单字符串

适用场景:

        redis默认字符串都是使用sds。

 

优点:

获取字符串长度:复杂度O(1),直接使用sdshdr->len就能获取到长度。而常规的是o(N)。

二进制安全存储:使用长度来规定了数据的存放长度,而c语言字符数组遇到\0就认为是结尾了,使得redis不仅仅可以存放字符串还可以存放任意二进制数据。

杜绝缓冲区溢出:sds存放的数据长度都是指定的,不存在溢出。

修改字符串减少内存重分配次数:在sdscat调用的sdsMakeRoomFor实现中当拷贝一个比当前存储区大的字符串的时候,如果小于1m会直接分配2倍当前字符串的大小空间,如果大于1m会在当前字符串长度的基础上面多分配1m的空间。

    惰性释放:在sdstrim(sdsclear)中,会把删除的字符空间长度累加sdshdr->free里。

 

原理解释:

简单动态字符串(simpledynamic string,SDS),定义在sds.h头文件里面,包含了sds的定义以及sds相关的基础操作函数。

sds定义:

struct sdshdr {

    int len;

    int free;

    char buf[];

};

这里面最后一个buf[]使用0其实是一种叫做柔性数组的技巧:

 

redis使用sds和常规的字符串好处有:

sds定义的API有:

sdsnew :创建一个给定c字符串的sds

sds sdsnew(const char *init) {

   size_t initlen = (init == NULL) ? 0 : strlen(init); //取字符串长度

   return sdsnewlen(init, initlen);

}

sds sdsnewlen(const void *init, size_tinitlen) {

   struct sdshdr *sh;

   if (init) {

       sh = zmalloc(sizeof(struct sdshdr)+initlen+1);  //内容 头部+数据

    }else {

       sh = zcalloc(sizeof(struct sdshdr)+initlen+1);

   }   

   if (sh == NULL) return NULL;

   sh->len = initlen;

   sh->free = 0;

   if (initlen && init)

       memcpy(sh->buf, init, initlen);

   sh->buf[initlen] = '\0';

   return (char*)sh->buf;

}

sdslen :返回sds已经使用的空间字节数

这种通过sds可以直接获取到sds的头部

static inline size_t sdslen(const sds s) {

   struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));

   return sh->len;

}

sdscat:追加一个字符串到sds尾部

sds sdsMakeRoomFor(sds s, size_t addlen) {

   struct sdshdr *sh, *newsh;

   size_t free = sdsavail(s);

   size_t len, newlen;

 

   if (free >= addlen) return s;

   len = sdslen(s);

   sh = (void*) (s-(sizeof(struct sdshdr)));

   newlen = (len+addlen);

   if (newlen < SDS_MAX_PREALLOC) // SDS_MAX_PREALLOC=1 M大小 ,如果小于1M咱就分配字符串长度的2倍

       newlen *= 2;

   else

       newlen += SDS_MAX_PREALLOC; //如果大于1M,咱就在当前字符串的长度再分配1m的空间

   newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);

   if (newsh == NULL) return NULL;

 

   newsh->free = newlen - len;

   return newsh->buf;

}

sds sdscatlen(sds s, const void *t, size_tlen) {

   struct sdshdr *sh;

   size_t curlen = sdslen(s);

 

    s= sdsMakeRoomFor(s,len);  //减少重分配

   if (s == NULL) return NULL;

   sh = (void*) (s-(sizeof(struct sdshdr)));

   memcpy(s+curlen, t, len);

   sh->len = curlen+len;

   sh->free = sh->free-len; //惰性释放

   s[curlen+len] = '\0';

   return s;

}

 

sds sdscat(sds s, const char *t) {

   return sdscatlen(s, t, strlen(t));

}

sdstrim:清除sds首尾的字符(可指定多个)

sds sdstrim(sds s, const char *cset) {

   struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));

   char *start, *end, *sp, *ep;

   size_t len;

 

   sp = start = s;

   ep = end = s+sdslen(s)-1;

   while(sp <= end && strchr(cset, *sp)) sp++;

   while(ep > start && strchr(cset, *ep)) ep--;

   len = (sp > ep) ? 0 : ((ep-sp)+1);

   if (sh->buf != sp) memmove(sh->buf, sp, len);

   sh->buf[len] = '\0';

   sh->free = sh->free+(sh->len-len); //惰性释放

   sh->len = len;

   return s;

}

 

 

三、  双端链表

适用场景:

    链表在redis应用比较广泛,列表键底层实现之一就是链表;客户端信息;

    采用双端链表可以定位到头部或者尾部,每个节点的数据类型为任意数据类型,通过定义复制、释放、比较的函数指针实现了可以操作任意用户自定义数据。

原理解释:

链表节点:

typedef struct listNode {

   struct listNode *prev; //前驱

   struct listNode *next;//后继

   void *value; //值指针

} listNode;

 

链表定义:

typedef struct list {

   listNode *head;

   listNode *tail;

   // 节点值复制函数

   void *(*dup)(void *ptr);

   // 节点值释放函数

   void (*free)(void *ptr);

   // 节点值对比函数

   int (*match)(void *ptr, void *key);

   // 链表所包含的节点数量

   unsigned long len;

 

} list;

/* Prototypes */

list *listCreate(void);

void listRelease(list *list);

list *listAddNodeHead(list *list, void*value);

list *listAddNodeHead(list *list, void*value)

{  

   listNode *node;

   

   if ((node = zmalloc(sizeof(*node))) == NULL)

       return NULL;

   node->value = value;

   if (list->len == 0) {     //如果链表为空

       list->head = list->tail = node;

       node->prev = node->next = NULL;

    }else {

       node->prev = NULL;

       node->next = list->head;

       list->head->prev = node;

       list->head = node;

    }

   

   list->len++;

   

   return list;

}

 

 

四、  字典

适用场景:

    在redis中字典用的应用很广泛的,因为redis是一个键值(k-v)内存数据库,所以存储的都是以字典作为基准的,字典也是作为hash键的底层之一:当保存的元素都是比较差的字符串或者hash键值比较多的时候就会使用字典来存储。

原理解释:

    由于字典是由hash表存储的,hash表的定义如下:

    typedefstruct dictht {

   dictEntry **table;  // 哈希表数组

   unsigned long size; // 哈希表大小

   unsigned long sizemask; // 哈希表大小掩码,用于计算索引值,等于 size -1

   unsigned long used; // 已经使用的

}dictht;

 

hash表节点

typedefstruct dictEntry {

    void *key;

    union { //是一个联合体 用于存放不同的数据类型

        void *val;

        uint64_t u64;

        int64_t s64;

    } v;

    struct dictEntry *next;  //主要是用于解决键冲突问题

}dictEntry;

 

字典类型:

typedefstruct dictType {

    // hash函数

    unsigned int (*hashFunction)(const void*key);

    // 复制键

    void *(*keyDup)(void *privdata, const void*key);

    // 复制值

    void *(*valDup)(void *privdata, const void*obj);

    // 对比键

    int (*keyCompare)(void *privdata, constvoid *key1, const void *key2);

    // 销毁键

    void (*keyDestructor)(void *privdata, void*key);

    // 销毁值

    void (*valDestructor)(void *privdata, void*obj);

}dictType;

 

字典的定义:

typedefstruct dict {

    dictType *type; //可以实现任意用户类型,只需要设定了特定的类型操作函数

    void *privdata;

    dictht ht[2];

    int rehashidx; /* rehashing not in progressif rehashidx == -1 */

    int iterators; /* number of iteratorscurrently running */

}dict;

 

//其中rehash是对hash表进行增大或者减小,当hash表的负载因子(负载因子=dictht.used/dictht.size)

 

创建一个字典

dict*dictCreate(dictType *type, void *privDataPtr);

dict*dictCreate(dictType *type,

        void *privDataPtr)

{

    dict *d = zmalloc(sizeof(*d)); //字典分配内存

    _dictInit(d,type,privDataPtr);

    return d;

}

int_dictInit(dict *d, dictType *type,

        void *privDataPtr)

{

    _dictReset(&d->ht[0]);

    _dictReset(&d->ht[1]);

    d->type = type;      

    d->privdata = privDataPtr; // 设置私有数据

    d->rehashidx = -1; //默认是-1

    d->iterators = 0;

    return DICT_OK;

}

staticvoid _dictReset(dictht *ht)

{  

    ht->table = NULL;  //默认还不分配

    ht->size = 0;

    ht->sizemask = 0;

    ht->used = 0;

}

 

//创建hash表/rehash

intdictExpand(dict *d, unsigned long size)

{

    dictht n; /* the new hash table */

    // 根据 size 参数,计算哈希表的大小

    unsigned long realsize =_dictNextPower(size);

    /* the size is invalid if it is smallerthan the number of

     * elements already inside the hash table*/

    if (dictIsRehashing(d) || d->ht[0].used> size)

        return DICT_ERR;

 

    /* Allocate the new hash table andinitialize all pointers to NULL */

    n.size = realsize;

    n.sizemask = realsize-1;

    // T = O(N)

    n.table =zcalloc(realsize*sizeof(dictEntry*));

    n.used = 0;

 

    /* Is this the first initialization? If soit's not really a rehashing

     * we just set the first hash table so thatit can accept keys. */

   

    if (d->ht[0].table == NULL) {

        d->ht[0] = n;

        return DICT_OK;

    }

 

    /* Prepare a second hash table forincremental rehashing */

    d->ht[1] = n;

  

  //扩展完毕后需要设置rehash 为0,下一次就可以进行单步rehash了

d->rehashidx = 0;

                                                                          

    return DICT_OK;

}

staticunsigned long _dictNextPower(unsigned long size)

{

    unsigned long i = DICT_HT_INITIAL_SIZE;  //默认是4

 

    if (size >= LONG_MAX) return LONG_MAX;

    while(1) {

        if (i >= size)//如果小于4,咱就分配4个

            return i;

        i *= 2;//直到i大于等于size

    }      

}

 

 

//在字典中添加一个键值对

intdictAdd(dict *d, void *key, void *val)

{                                      

    // 尝试添加键到字典,并返回包含了这个键的新哈希节点

    // T = O(N)

    dictEntry *entry = dictAddRaw(d,key);

    // 键已存在,添加失败

    if (!entry) return DICT_ERR;

    // 键不存在,设置节点的值

    // T = O(1)

    dictSetVal(d, entry, val);

    // 添加成功

    return DICT_OK;

}

dictEntry*dictAddRaw(dict *d, void *key)  //获取一个包含键的hash表节点

{

    int index;

    dictEntry *entry;

    dictht *ht;

 

    //如果条件允许,进行单步rehash

    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1if

     * the element already exists. */

    if ((index =_dictKeyIndex(d, key)) == -1)

        return NULL;

    // T = O(1)

    /* Allocate the memory and store the newentry */

  

    ht = dictIsRehashing(d) ? &d->ht[1]: &d->ht[0]; // 如果字典正在 rehash ,那么将新键添加到 1 号哈希表

    entry = zmalloc(sizeof(*entry));

    entry->next = ht->table[index];  //默认都是在头部插入的

    ht->table[index] = entry;

    ht->used++;

    /* Set the hash entry fields. */

    dictSetKey(d, entry, key);

    return entry;

}

 

//步长为1的rehash

staticvoid _dictRehashStep(dict *d) {

    if (d->iterators == 0) dictRehash(d,1);

}

 

//rehash

intdictRehash(dict *d, int n) {

 

    // 只可以在 rehash 进行中时执行

    if (!dictIsRehashing(d)) return 0;

 

    while(n--) {

        dictEntry *de, *nextde;

 

        /* Check if we already rehashed thewhole table... */

        if (d->ht[0].used == 0) {

            zfree(d->ht[0].table);//如果rehash完毕

            d->ht[0] = d->ht[1];//设置默认hashtable为第一个hashtable

            _dictReset(&d->ht[1]);

            d->rehashidx = -1;

            return 0;

        }

 

        /* Note that rehashidx can't overflowas we are sure there are more

         * elements because ht[0].used != 0 */

        assert(d->ht[0].size >(unsigned)d->rehashidx);

 

        //找到第一个非空hashtable元素

       while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;

 

        de =d->ht[0].table[d->rehashidx];

        /* Move all the keys in this bucketfrom the old to the new hash HT */

        while(de) {

            unsigned int h;

 

            // 保存下个节点的指针

            nextde = de->next;

 

            /* Get the index in the new hashtable */

//重新计算hash值

            h = dictHashKey(d, de->key)& d->ht[1].sizemask;

 

          

            de->next = d->ht[1].table[h];

            d->ht[1].table[h] = de;

 

          

            d->ht[0].used--;

            d->ht[1].used++;

            de = nextde;

        }

      

        d->ht[0].table[d->rehashidx] =NULL;

        d->rehashidx++;

    }

 

    return 1;

}

 

 

//在字典中查找键为key的节点返回索引

staticint _dictKeyIndex(dict *d, const void *key)

{

    unsigned int h, idx, table;

    dictEntry *he;

   

//这里进行扩展

    /* Expand the hash tableif needed */

    if (_dictExpandIfNeeded(d)== DICT_ERR)

        return -1;

    /* Compute the key hash value */

    h = dictHashKey(d, key);

    // T = O(1)

    for (table = 0; table <= 1; table++) {

        idx = h & d->ht.sizemask;

        /* Search if this slot does not alreadycontain the given key */

        // T = O(1)

        he = d->ht
.table[idx];

        while(he) {

            if (dictCompareKeys(d, key,he->key))

                return -1;

            he = he->next;

        }

        // 如果运行到这里时,说明0 号哈希表中所有节点都不包含 key

        // 如果这时 rehahs 正在进行,那么继续对 1 号哈希表进行 rehash

        if (!dictIsRehashing(d)) break;

    }

 

    // 返回索引值

    return idx;

}

 

//计算hash值

#definedictHashKey(d, key) (d)->type->hashFunction(key)

//设置键

#definedictSetKey(d, entry, _key_) do { \

    if ((d)->type->keyDup) \

        entry->key = (d)->type->keyDup((d)->privdata,_key_); \

    else \

        entry->key = (_key_); \

}while(0)

 

 

#definedictSetVal(d, entry, _val_) do { \

    if ((d)->type->valDup) \

        entry->v.val =(d)->type->valDup((d)->privdata, _val_); \

    else \

        entry->v.val = (_val_); \

}while(0)

 

 

rehash:

staticint _dictExpandIfNeeded(dict *d)

{

    /* Incremental rehashing already inprogress. Return. */

    if (dictIsRehashing(d)) return DICT_OK;

 

    /* If the hash table is empty expand it tothe initial size. */

//如果第一次进来,咱就分配一个默认大小的hashtable DICT_HT_INITIAL_SIZE=4

    if (d->ht[0].size == 0) returndictExpand(d, DICT_HT_INITIAL_SIZE);

 

    /* If we reached the 1:1 ratio, and we areallowed to resize the hash

     * table (global setting) or we shouldavoid it but the ratio between

     * elements/buckets is over the"safe" threshold, we resize doubling

     * the number of buckets. */

// dict_can_resize默认是启用的

// dict_force_resize_ratiorehash因子默认是5

//这里说如果使用的大于等于table的大小并且启用了字典可resize;已经使用的除以字典大小等于5咱就可//以进行rehash了

    if (d->ht[0].used >= d->ht[0].size&&

        (dict_can_resize ||

         d->ht[0].used/d->ht[0].size >dict_force_resize_ratio))

    {

        return dictExpand(d,d->ht[0].used*2);

    }

 

    return DICT_OK;

}

 

 

//hash函数 使用的是MurmurHash2原版,高运算性能,低碰撞率。

/**

     * MurMurHash算法,是非加密HASH算法,性能很高,

     *  比传统的CRC32,MD5,SHA-1(这两个算法都是加密HASH算法,复杂度本身就很高,带来的性能上的损害也不可避免)

     *  等HASH算法要快很多,而且据说这个算法的碰撞率很低.

     * http://murmurhash.googlepages.com/

     */ 

unsignedint dictGenHashFunction(const void *key, int len) {

    /* 'm' and 'r' are mixing constantsgenerated offline.

     They're not really 'magic', they justhappen to work well.  */

    uint32_t seed = dict_hash_function_seed;

    const uint32_t m = 0x5bd1e995;

    const int r = 24;

 

    /* Initialize the hash to a 'random' value*/

    uint32_t h = seed ^ len;

 

    /* Mix 4 bytes at a time into the hash */

    const unsigned char *data = (const unsignedchar *)key;

 

    while(len >= 4) {

        uint32_t k = *(uint32_t*)data;

 

        k *= m;

        k ^= k >> r;

        k *= m;

 

        h *= m;

        h ^= k;

 

        data += 4;

        len -= 4;

    }   

 

    /* Handle the last few bytes of the inputarray  */

    switch(len) {

    case 3: h ^= data[2] << 16;

    case 2: h ^= data[1] << 8;

    case 1: h ^= data[0]; h *= m;

    };  

 

    /* Do a few final mixes of the hash toensure the last few

     * bytes are well-incorporated. */

    h ^= h >> 13;

    h *= m;

    h ^= h >> 15;

 

    return (unsigned int)h;

}

 

五、  跳跃表

适用场景:

redis在内部只有2个地方用到了跳跃表,第一个是有序集合键(键较多;或者值元素比较长);第二个是集群节点中。

原理解释:

跳跃表节点定义:

typedefstruct zskiplistNode {

    robj *obj; //redis对象

    double score; //分值,跳跃表中分值是按照从小到大排列的

    struct zskiplistNode *backward; //后退指针

    struct zskiplistLevel {

        struct zskiplistNode *forward;//前进指针

        unsigned int span; //跨度,记录2个节点之间的距离,越大距离就越远

    } level[];//层数组

}zskiplistNode;

 

 

跳跃表定义:

typedefstruct zskiplist {

    struct zskiplistNode *header, *tail;

    unsigned long length; //跳跃表节点数(不包括表头节点)

    int level; //记录跳跃表内层数最大的那个节点的层数

} zskiplist;

 

  //创建一个跳跃表

zskiplist*zslCreate(void) {

    int j;

    zskiplist *zsl;

    zsl = zmalloc(sizeof(*zsl)); //分配空间

    zsl->level = 1;  //默认是1层

    zsl->length = 0;//节点数为0

 

//分配了一个最大层数32层的大小的空间

//#defineZSKIPLIST_MAXLEVEL 32

    zsl->header =zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);

    for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++){

        zsl->header->level[j].forward =NULL;

        zsl->header->level[j].span = 0;//跨度为0标示没连接到任何节点

    }

    zsl->header->backward = NULL;

    zsl->tail = NULL;

    return zsl;

}

zskiplistNode *zslCreateNode(int level,double score, robj *obj) {

   zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(structzskiplistLevel));

 

   zn->score = score;

   zn->obj = obj;

   return zn;

}

 

 

六、  整数集合

适用场景:

redis是集合键的底层实现之一,当集合里面只包含整数,并且数量不多的时候,redis就用整数集合作为集合键的底层实现。

原理解释:

    

整数集合定义:

typedefstruct intset {

    uint32_t encoding; // 编码方式

    uint32_t length;

    int8_t contents[];//内容数组 ,数据内容是从小到大排列,数组不包含重复项,数据类型取决于encoding

} intset;

 

//创建一个intset

intset *intsetNew(void) {

   intset *is = zmalloc(sizeof(intset));

// #defineINTSET_ENC_INT16 (sizeof(int16_t)) -128~127

//#define INTSET_ENC_INT32(sizeof(int32_t))   -32768~32767

//#define INTSET_ENC_INT64 (sizeof(int64_t))   -2^63 ~ 2^63-1

   is->encoding = intrev32ifbe(INTSET_ENC_INT16);  

   is->length = 0;

   return is;

}

//添加一个元素

intset *intsetAdd(intset *is, int64_tvalue, uint8_t *success) {

    //获取当前的编码类型

   uint8_t valenc = _intsetValueEncoding(value);

   uint32_t pos;

   if (success) *success = 1;

   /* Upgrade encoding if necessary. If we need to upgrade, we know that

    * this value should be either appended (if > 0) or prepended (if <0),

     *because it lies outside the range of existing values. */

   

    //类型比现在的类型要大,咱就改编码模式

   if (valenc > intrev32ifbe(is->encoding)) {

       /* This always succeeds, so we don't need to curry *success. */

       // T = O(N)

       return intsetUpgradeAndAdd(is,value);

    }else {

       /* Abort if the value is already present in the set.

        * This call will populate "pos" with the right position toinsert

        * the value when it cannot be found. */

        //如果搜索到了咱就返回

       if (intsetSearch(is,value,&pos)) {

           if (success) *success = 0;

           return is;

       }

        //给需要添加的元素留出空间

       is = intsetResize(is,intrev32ifbe(is->length)+1);

        //如果pos是小于整体长度,意思是搜索到的位置不在2边,咱需要移动数据

       if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);

    }

    //在索引上面设置值

   _intsetSet(is,pos,value);

   is->length = intrev32ifbe(intrev32ifbe(is->length)+1); //长度加1

   return is;

}

 //获取数据的编码格式

/* Return the required encoding for theprovided value.

static uint8_t _intsetValueEncoding(int64_tv) {

   if (v < INT32_MIN || v > INT32_MAX)

       return INTSET_ENC_INT64;

   else if (v < INT16_MIN || v > INT16_MAX)

       return INTSET_ENC_INT32;

   else

       return INTSET_ENC_INT16;

}

//更新intset并且添加数据

static intset *intsetUpgradeAndAdd(intset*is, int64_t value) {

   uint8_t curenc = intrev32ifbe(is->encoding);

   uint8_t newenc = _intsetValueEncoding(value);

   int length = intrev32ifbe(is->length);

    //如果value是小于0的那么咱就添加到前端,否则就添加到后端

   int prepend = value < 0 ? 1 : 0;

 

   /* First set new encoding and resize */

   is->encoding = intrev32ifbe(newenc);

   // T = O(N)

   is = intsetResize(is,intrev32ifbe(is->length)+1);

 

   /* Upgrade back-to-front so we don't overwrite values.

     * Note that the "prepend" variableis used to make sure we have an empty

    * space at either the beginning or the end of the intset. */

    //因为前面说过了数据是从小到大排列的

   // T = O(N)

   while(length--)

       _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));

 

   /* Set the value at the beginning or the end. */

   // 设置新值,根据 prepend 的值来决定是添加到数组头还是数组尾

   if (prepend)

       _intsetSet(is,0,value);

   else

       _intsetSet(is,intrev32ifbe(is->length),value);

   is->length = intrev32ifbe(intrev32ifbe(is->length)+1);

 

   return is;

}

 

//扩展intset空间

static intset *intsetResize(intset *is,uint32_t len) {

   uint32_t size = len*intrev32ifbe(is->encoding);

   is = zrealloc(is,sizeof(intset)+size); //只扩充空间,数据是不变的

   return is;

}

 

/* Return the value at pos, given anencoding. 根据当前编码,返回索引上的值

 */

static int64_t _intsetGetEncoded(intset*is, int pos, uint8_t enc) {

   int64_t v64;

   int32_t v32;

   int16_t v16;

   if (enc == INTSET_ENC_INT64) {

       memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));

       memrev64ifbe(&v64);

       return v64;

    }else if (enc == INTSET_ENC_INT32) {

       memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));

       memrev32ifbe(&v32);

       return v32;

    }else {

       memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));

       memrev16ifbe(&v16);

       return v16;

    }

}

 

/* Set the value at pos, using theconfigured encoding. 根据当前编码,设置索引上的值

 *

 * T= O(1)

 */

static void _intsetSet(intset *is, int pos,int64_t value) {

   uint32_t encoding = intrev32ifbe(is->encoding);

   if (encoding == INTSET_ENC_INT64) {

       ((int64_t*)is->contents)[pos] = value;

       memrev64ifbe(((int64_t*)is->contents)+pos);

    }else if (encoding == INTSET_ENC_INT32) {

       ((int32_t*)is->contents)[pos] = value;

       memrev32ifbe(((int32_t*)is->contents)+pos);

    }else {

       ((int16_t*)is->contents)[pos] = value;

       memrev16ifbe(((int16_t*)is->contents)+pos);

    }

}

 

//在intset中搜索value,如果找到返回索引,否则返回0

/* Search for the position of"value". Return 1 when the value was found and

 *sets "pos" to the position of the value within the intset. Return 0when

 *the value is not present in the intset and sets "pos" to the position

 *where "value" can be inserted. */

static uint8_t intsetSearch(intset *is,int64_t value, uint32_t *pos) {

   int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;

   int64_t cur = -1;

 

   /* The value can never be found when the set is empty */

//如果intset为空,咱就直接返回0,并且设置索引为0

   if (intrev32ifbe(is->length) == 0) {

       if (pos) *pos = 0;

       return 0;

    }else {

       /* Check for the case where we know we cannot find the value,

        * but do know the insert position. */

        //如果值大于最后一个,咱都返回最后一个位置的索引

       if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {

           if (pos) *pos = intrev32ifbe(is->length);

           return 0;

       } else if (value < _intsetGet(is,0)) {//如果值小于第一个,咱返回0

           if (pos) *pos = 0;

           return 0;

       }  

   }  

 

//二分法查找

   while(max >= min) {

       mid = ((unsigned int)min + (unsigned int)max) >> 1;

       cur = _intsetGet(is,mid);

       if (value > cur) {

           min = mid+1;

       } else if (value < cur) {

           max = mid-1;

       } else {

           break;

       }  

   }  

 

   if (value == cur) {

       if (pos) *pos = mid;

       return 1;

    }else {

       if (pos) *pos = min;

       return 0;

   }  

}

 

//移动数据

static void intsetMoveTail(intset *is,uint32_t from, uint32_t to) {

   void *src, *dst;

   uint32_t bytes = intrev32ifbe(is->length)-from;

   uint32_t encoding = intrev32ifbe(is->encoding);

   if (encoding == INTSET_ENC_INT64) {

       src = (int64_t*)is->contents+from;

       dst = (int64_t*)is->contents+to;

       bytes *= sizeof(int64_t);

    }else if (encoding == INTSET_ENC_INT32) {

       src = (int32_t*)is->contents+from;

       dst = (int32_t*)is->contents+to;

       bytes *= sizeof(int32_t);

    }else {

       src = (int16_t*)is->contents+from;

       dst = (int16_t*)is->contents+to;

       bytes *= sizeof(int16_t);

    }

   memmove(dst,src,bytes);

}

 

七、  压缩列表

适用场景:

压缩列表是列表键和hash键的底层实现之一,当一个列表键只包含较小的整数或者长度比较短的字符串,那么久使用压缩列表。

原理解释:

//ziplist是一个特殊编码的双端链表,保存了字符串和整形,整数保存的是实际整数而不是字符串。

   /*The ziplist is a specially encoded dually linked list that is designed

 * tobe very memory efficient. It stores both strings and integer values,

 *where integers are encoded as actual integers instead of a series of

 *characters. It allows push and pop operations on either side of the list

 * inO(1) time. However, because every operation requires a reallocation of

 *the memory used by the ziplist, the actual complexity is related to the

 *amount of memory used by the ziplist.

 *

 *----------------------------------------------------------------------------

 *

 *ZIPLIST OVERALL LAYOUT:

 *The general layout of the ziplist is as follows://存储模型如下

 *<zlbytes><zltail><zllen><entry><entry><zlend>

 *    4字节  4字节  2字节                1字节

 *<zlbytes> is an unsigned integer to hold the number of bytes that the//标示存储的字节数,通过这个值可以对ziplist进行调整而不用遍历所有获取大小

 *ziplist occupies. This value needs to be stored to be able to resize the

 *entire structure without the need to traverse it first.

 *

 *<zltail> is the offset to the last entry in the list. This allows a pop//保存了链表的尾部偏移

 *operation on the far side of the list without the need for full traversal.

 *

 *<zllen> is the number of entries.When this value is larger than 2**16-2,//保存节点的数目,此值最大是

 * weneed to traverse the entire list to know how many items it holds.

 *

 *<zlend> is a single byte special value, equal to 255, which indicates the//标示ziplist的尾部,值是255

 *end of the list.

 

//每个entry保存了2部分内容:1、前置节点的长度;2、当前节点的编码类型和长度

* Every entry in the ziplist is prefixed bya header that contains two pieces

 * ofinformation. First, the length of the previous entry is stored to be

 *able to traverse the list from back to front. Second, the encoding with an

 *optional string length of the entry itself is stored.

entry:[ previous_length][encoding][content]

 

//如果前置长度小于254个字节,只用1个字节保存值;如果大于或者等于254个字节,它将占5个字节,第一个字节设置为254,其他4个字节存储长度。

* The length of the previous entry isencoded in the following way:

 * Ifthis length is smaller than 254 bytes, it will only consume a single

 *byte that takes the length as value. When the length is greater than or

 *equal to 254, it will consume 5 bytes. The first byte is set to 254 to

 *indicate a larger value is following. The remaining 4 bytes take the

 *length of the previous entry as value.

 

//其他的字段是需要根据内容来存放的,如果存放的是一个字符串,那么头2个位存放编码字符串所使用的类型,接下来的位是存放字符串的长度,如果头2个位都是1那么接下来2个标示存储整形的类型

* The other header field of the entryitself depends on the contents of the

 *entry. When the entry is a string, the first 2 bits of this header will hold

 *the type of encoding used to store the length of the string, followed by the

 *actual length of the string. When the entry is an integer the first 2 bits

 *are both set to 1. The following 2 bits are used to specify what kind of

 *integer will be stored after this header. An overview of the different

 *types and encodings is as follows:

 

* |00pppppp| - 1 byte    //00 字符串长度小于63字节

 *     String value with length less than or equal to 63 bytes (6 bits).

 *|01pppppp|qqqqqqqq| - 2 bytes  //01 字符串长度小于16383字节

 *     String value with length less than or equal to 16383 bytes (14 bits).

 *|10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes //10字符串大于或者等于16384的字符串

 *     String value with length greater than or equal to 16384 bytes.

 *|11000000| - 1 byte                                 //00标示 1个字节(int16_t)

 *     Integer encoded as int16_t (2 bytes).

 *|11010000| - 1 byte

 *     Integer encoded as int32_t (4 bytes).            //01标示 4个字节(int32_t)

 *|11100000| - 1 byte

 *      Integer encoded as int64_t (8 bytes).             //10标示 8个字节(int64_t)

 *|11110000| - 1 byte

 *     Integer encoded as 24 bit signed (3 bytes).      //11 标示存放的是3个字节的有符号整形

 *|11111110| - 1 byte

 *     Integer encoded as 8 bit signed (1 byte).      //11111 表中存放的是1个字节有符号整形

 *|1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.//0000-1101标示4位长0-12的无符号整数,没有right值了

 *     Unsigned integer from 0 to 12. The encoded value is actually from

 *      1to 13 because 0000 and 1111 can not be used, so 1 should be

 *     subtracted from the encoded 4 bit value to obtain the right value.

 *|11111111| - End of ziplist.      //标示ziplist结尾

 *

 *All the integers are represented in little endian byte order.

 
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  redis