您的位置:首页 > 数据库 > Redis

结合redis设计与实现的redis源码学习-2-SDS(简单动态字符串)

2017-09-24 12:05 1286 查看
上一次我们学习了redis的内存分配方式,今天我们来学习redis最基本的数据结构SDS,在redis的数据库里,包含字符产值的简直对在底层都是由SDS实现的。

SDS的基本数据结构是sdshdr结构体:

struct sdshdr{
int len; //记录数组中已经使用的字节的数量,等于所保存的字符串长度
int free; //记录buf数组中未使用字节的数量
char buf[]; //字节数组,用于保存字符串
};


使用这种结构体来存储字符串有以下好处:

1、可以直接获取字符串长度;

2、在使用时杜绝缓冲区溢出,因为free表示剩余可用空间;

3、二进制安全,字节数组不一定非要\0结尾,len记录使用长度;

4、可以使用部分string.h库中的函数。

这几个特性结合redis的空间预分配和惰性空间释放两种优化策略将速度提升至最大。

我发现SDS中的结构体与之前使用的有所差异,在3.2中为了更好的节省内存,使用多种结构体不同的类型来记录数据,并用一个字节的三个位来表示结构体类型,如果大量使用短小字符串的话,节省下来的内存也是比较可观的。

下面上代码:

/* SDSLib 2.0 -- A C dynamic strings library*/

#ifndef __S
4000
DS_H
#define __SDS_H

#define SDS_MAX_PREALLOC (1024*1024)

#include <sys/types.h>
#include <stdarg.h>
#include <stdint.h>

typedef char *sds;

/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
//这里使用宏定义和inline来判断使用哪种类型的结构体,并返回对应结构体所需要的数据
#define SDS_TYPE_5  0
#define SDS_TYPE_8  1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3
#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T))); //获得对应结构体的名称
#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
//返回对应结构体的len数据
static inline size_t sdslen(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->len;
case SDS_TYPE_16:
return SDS_HDR(16,s)->len;
case SDS_TYPE_32:
return SDS_HDR(32,s)->len;
case SDS_TYPE_64:
return SDS_HDR(64,s)->len;
}
return 0;
}
//返回对应结构体的可用大小
static inline size_t sdsavail(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5: {
return 0;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
return sh->alloc - sh->len;
}
}
return 0;
}
//设置sds使用的新长度
static inline void sdssetlen(sds s, size_t newlen) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len = newlen;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len = newlen;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len = newlen;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len = newlen;
break;
}
}
//设置sds增长了多少长度
static inline void sdsinclen(sds s, size_t inc) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len += inc;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len += inc;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len += inc;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len += inc;
break;
}
}

/* sdsalloc() = sdsavail() + sdslen() */
static inline size_t sdsalloc(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->alloc;
case SDS_TYPE_16:
return SDS_HDR(16,s)->alloc;
case SDS_TYPE_32:
return SDS_HDR(32,s)->alloc;
case SDS_TYPE_64:
return SDS_HDR(64,s)->alloc;
}
return 0;
}

static inline void sdssetalloc(sds s, size_t newlen) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
/* Nothing to do, this type has no total allocation info. */
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->alloc = newlen;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->alloc = newlen;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->alloc = newlen;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->alloc = newlen;
break;
}
}

sds sdsnewlen(const void *init, size_t initlen);
sds sdsnew(const char *init);
sds sdsempty(void);
sds sdsdup(const sds s);
void sdsfree(sds s);
sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len);
sds sdscat(sds s, const char *t);
sds sdscatsds(sds s, const sds t);
sds sdscpylen(sds s, const char *t, size_t len);
sds sdscpy(sds s, const char *t);

sds sdscatvprintf(sds s, const char *fmt, va_list ap);

sds sdscatfmt(sds s, char const *fmt, ...);
sds sdstrim(sds s, const char *cset);
void sdsrange(sds s, int start, int end);
void sdsupdatelen(sds s);
void sdsclear(sds s);
int sdscmp(const sds s1, const sds s2);
sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count);
void sdsfreesplitres(sds *tokens, int count);
void sdstolower(sds s);
void sdstoupper(sds s);
sds sdsfromlonglong(long long value);
sds sdscatrepr(sds s, const char *p, size_t len);
sds *sdssplitargs(const char *line, int *argc);
sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
sds sdsjoin(char **argv, int argc, char *sep);
sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);

/* Low level functions exposed to the user API */
sds sdsMakeRoomFor(sds s, size_t addlen);
void sdsIncrLen(sds s, int incr);
sds sdsRemoveFreeSpace(sds s);
size_t sdsAllocSize(sds s);
void *sdsAllocPtr(sds s);

/* Export the allocator used by SDS to the program using SDS.
* Sometimes the program SDS is linked to, may use a different set of
* allocators, but may want to allocate or free things that SDS will
* respectively fr
114bb
ee or allocate. */
void *sds_malloc(size_t size);
void *sds_realloc(void *ptr, size_t size);
void sds_free(void *ptr);
#endif


在.c文件中,引用了sdsalloc.h,其中定义了zmalloc的相关函数。

#include "zmalloc.h"
#define s_malloc zmalloc
#define s_realloc zrealloc
#define s_free zfree


c文件还挺大的,有1000多行,在此我选择了一些贴了上来,其中的内联函数使用方式同.h,用来选择使用的结构体类型:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include "sds.h"
#include "sdsalloc.h"
/* Create a new sds string with the content specified by the 'init' pointer * and 'initlen'.*/
sds sdsnewlen(const void *init, size_t initlen) {
void *sh;
sds s;
char type = sdsReqType(initlen);
//如果是一个空字符串的话会使用8的结构体
if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
int hdrlen = sdsHdrSize(type);
unsigned char *fp; /* flags pointer. */

sh = s_malloc(hdrlen+initlen+1);
if (!init)
memset(sh, 0, hdrlen+initlen+1);
if (sh == NULL) return NULL;
s = (char*)sh+hdrlen;
fp = ((unsigned char*)s)-1; //根据结构体在内存中的排列,-1位类型flag
switch(type) {
case SDS_TYPE_5: {
*fp = type | (initlen << SDS_TYPE_BITS);
break;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
}
if (initlen && init)
memcpy(s, init, initlen);
s[initlen] = '\0';
return s;
}
/* Create an empty (zero length) sds string. Even in this case the string * always has an implicit null term. */
sds sdsempty(void) {
return sdsnewlen("",0);
}
void sdsfree(sds s) {
if (s == NULL) return;
s_free((char*)s-sdsHdrSize(s[-1])); //这里要减去flag为的长度和len还有alloc类型所占的长度才到分配内存的起点,sdsHdrSize是根据flag来计算头长度的内联函数,这里我将它省略了
}
//清空sds,这里只改变sds的长度,然后将sds第一位变为\0简化了操作
void sdsclear(sds s) {
sdssetlen(s, 0);
s[0] = '\0';
}
//给SDS增加长度
sds sdsMakeRoomFor(sds s, size_t addlen) {
void *sh, *newsh;
size_t avail = sdsavail(s); //获取可用大小
size_t len, newlen;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;

/* Return ASAP if there is enough space left. */
if (avail >= addlen) return s;

len = sdslen(s); //返回sds分配的长度
sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC) //这里的分配原则是最终小于max则直接翻倍,大于则+上max大小
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;

type = sdsReqType(newlen);

/* Don't use type 5: the user is appending to the string and type 5 is * not able to remember empty space, so sdsMakeRoomFor() must be called* at every appending operation. */
if (type == SDS_TYPE_5) type = SDS_TYPE_8;

hdrlen = sdsHdrSize(type);
if (oldtype==type) {
newsh = s_realloc(sh, hdrlen+newlen+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {
/* Since the header size changes, need to move the string forward,
* and can't use realloc */
newsh = s_malloc(hdrlen+newlen+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, newlen);
return s;
}
//释放SDS多余的空间
sds sdsRemoveFreeSpace(sds s) {
void *sh, *newsh;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;
size_t len = sdslen(s);
sh = (char*)s-sdsHdrSize(oldtype);

type = sdsReqType(len);
hdrlen = sdsHdrSize(type);
if (oldtype==type) {
newsh = s_realloc(sh, hdrlen+len+1); //我认为这里不需要操作,新的结构体等于老的结构体,那么就不会改变大小,这里可能是防止意外的在哪里改变吧
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {
newsh = s_malloc(hdrlen+len+1);//这里是创建一个新的SDS然后将数据拷贝过去
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, len);
return s;
}
sds sdscatlen(sds s, const void *t, size_t len) {
size_t curlen = sdslen(s);
//这里总是先检查长度扩大大小,再进行拷贝,保证不会溢出
s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
memcpy(s+curlen, t, len);
sdssetlen(s, curlen+len);
s[curlen+len] = '\0';
return s;
}
//使用更为简单的函数作为中转,方便使用
sds sdscat(sds s, const char *t) {
return sdscatlen(s, t, strlen(t));
}
#define SDS_LLSTR_SIZE 21
//使用SDS存储LL类型的数据
int sdsll2str(char *s, long long value) {
char *p, aux;
unsigned long long v;
size_t l;

/* Generate the string representation, this method produces
* an reversed string. */
v = (value < 0) ? -value : value;
p = s;
do {
*p++ = '0'+(v%10); //计算每一10进制位的值
v /= 10;
} while(v);
if (value < 0) *p++ = '-';

/* Compute length and add null term. */
l = p-s;
*p = '\0';

/* Reverse the string. */
p--;
while(s < p) {
aux = *s;
*s = *p;
*p = aux;
s++;
p--;
}
return l;
}
//这个函数使用不定参数,可以追加多个字符串
/* Like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
va_list cpy;
char staticbuf[1024], *buf = staticbuf, *t;
size_t buflen = strlen(fmt)*2;

/* We try to start using a static buffer for speed.
* If not possible we revert to heap allocation. */
if (buflen > sizeof(staticbuf)) {
buf = s_malloc(buflen);
if (buf == NULL) return NULL;
} else {
buflen = sizeof(staticbuf);
}

/* Try with buffers two times bigger every time we fail to * fit the string in the current buffer size. */
while(1) {
buf[buflen-2] = '\0';
va_copy(cpy,ap);
vsnprintf(buf, buflen, fmt, cpy);
va_end(cpy);
if (buf[buflen-2] != '\0') { //这里只要不为/0就是溢出了,代表大小不够,只能使用堆空间,但是这里可能会多次释放开辟对空间
if (buf != staticbuf) s_free(buf);
buflen *= 2;
buf = s_malloc(buflen);
if (buf == NULL) return NULL;
continue;
}
break;
}

/* Finally concat the obtained string to the SDS string and return it. */
t = sdscat(s, buf);
if (buf != staticbuf) s_free(buf);
return t;
}
//这里将不定参数变为va_list
sds sdscatprintf(sds s, const char *fmt, ...) {
va_list ap;
char *t;
va_start(ap, fmt);
t = sdscatvprintf(s,fmt,ap);
va_end(ap);
return t;
}
//这个函数用来转换多种类型的数据,说这个比上面的快。。。。
/* This function is similar to sdscatprintf, but much faster as it does* not rely on sprintf() family functions implemented by the libc that* are often very slow. Moreover directly handling the sds string as* new data is concatenated provides a performance improvement. * However this function only handles an incompatible subset of printf-alike* format specifiers:
* %s - C String
* %S - SDS string
* %i - signed int
* %I - 64 bit signed integer (long long, int64_t)
* %u - unsigned int
* %U - 64 bit unsigned integer (unsigned long long, uint64_t)
* %% - Verbatim "%" character.
*/
//这里学到了,C中不定参数的类型存储表示格式还是用格式化数据的%来定义
sds sdscatfmt(sds s, char const *fmt, ...) {
size_t initlen = sdslen(s);
const char *f = fmt;
int i;
va_list ap;

va_start(ap,fmt);
f = fmt;    /* Next format specifier byte to process. */
i = initlen; /* Position of the next byte to write to dest str. */
while(*f) {
char next, *str;
size_t l;
long long num;
unsigned long long unum;
//这里的意思是总会至少扩大一个字节,感觉怪怪的,
/* Make sure there is always space for at least 1 char. */
if (sdsavail(s)==0) {
s = sdsMakeRoomFor(s,1);
}
//判断不定参数类型,并进行处理
switch(*f) {
case '%':
next = *(f+1);
f++;
switch(next) {
case 's':
case 'S':
str = va_arg(ap,char*);
l = (next == 's') ? strlen(str) : sdslen(str);
if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
}
memcpy(s+i,str,l);
sdsinclen(s,l);
i += l;
break;
case 'i':
case 'I':
if (next == 'i')
num = va_arg(ap,int);
else
num = va_arg(ap,long long);
{
char buf[SDS_LLSTR_SIZE];
l = sdsll2str(buf,num);
if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
}
memcpy(s+i,buf,l);
sdsinclen(s,l);
i += l;
}
break;
case 'u':
case 'U':
if (next == 'u')
unum = va_arg(ap,unsigned int);
else
unum = va_arg(ap,unsigned long long);
{
char buf[SDS_LLSTR_SIZE];
l = sdsull2str(buf,unum);
if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
}
memcpy(s+i,buf,l);
sdsinclen(s,l);
i += l;
}
break;
default: /* Handle %% and generally %<unknown>. */
s[i++] = next;
sdsinclen(s,1);
break;
}
break;
default:
s[i++] = *f;
sdsinclen(s,1);
break;
}
f++;
}
va_end(ap);

/* Add null-term */
s[i] = '\0';
return s;
}
//截断,找到前后第一次没有出现所需字符的地方
sds sdstrim(sds s, const char *cset) {
char *start, *end, *sp, *ep;
size_t len;

sp = start = s;
ep = end = s+sdslen(s)-1;
while(sp <= end && strchr(cset, *sp)) sp++;
while(ep > sp && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
if (s != sp) memmove(s, sp, len);
s[len] = '\0';
sdssetlen(s,len);
return s;
}
//选择起点和访问SDS的元素
void sdsrange(sds s, int start, int end) {
size_t newlen, len = sdslen(s);

if (len == 0) return;
if (start < 0) {
start = len+start;
if (start < 0) start = 0;
}
if (end < 0) {
end = len+end;
if (end < 0) end = 0;
}
newlen = (start > end) ? 0 : (end-start)+1;
if (newlen != 0) {
if (start >= (signed)len) {
newlen = 0;
} else if (end >= (signed)len) {
end = len-1;
newlen = (start > end) ? 0 : (end-start)+1;
}
} else {
start = 0;
}
if (start && newlen) memmove(s, s+start, newlen);
s[newlen] = 0;
sdssetlen(s,newlen);
}
//对比两个SDS,这里不是只返回是否相同,而且返回值告诉了我们是哪种情况
int sdscmp(const sds s1, const sds s2) {
size_t l1, l2, minlen;
int cmp;

l1 = sdslen(s1);
l2 = sdslen(s2);
minlen = (l1 < l2) ? l1 : l2;
cmp = memcmp(s1,s2,minlen);
if (cmp == 0) return l1-l2;
return cmp;
}
//这里将一个字符串分割成多个SDS,以数组的形式返回,以传出参数count返回长度
sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count) {
int elements = 0, slots = 5, start = 0, j;
sds *tokens;

if (seplen < 1 || len < 0) return NULL;

tokens = s_malloc(sizeof(sds)*slots);
if (tokens == NULL) return NULL;

if (len == 0) {
*count = 0;
return tokens;
}
for (j = 0; j < (len-(seplen-1)); j++) {
/* make sure there is room for the next element and the final one */
if (slots < elements+2) {
sds *newtokens;

slots *= 2;
newtokens = s_realloc(tokens,sizeof(sds)*slots);
if (newtokens == NULL) goto cleanup;
tokens = newtokens;
}
/* search the separator */
if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) {
tokens[elements] = sdsnewlen(s+start,j-start);
if (tokens[elements] == NULL) goto cleanup;
elements++;
start = j+seplen;
j = j+seplen-1; /* skip the separator */
}
}
/* Add the final element. We are sure there is room in the tokens array. */
tokens[elements] = sdsnewlen(s+start,len-start);
if (tokens[elements] == NULL) goto cleanup;
elements++;
*count = elements;
return tokens;

cleanup:
{
int i;
for (i = 0; i < elements; i++) sdsfree(tokens[i]);
s_free(tokens);
*count = 0;
return NULL;
}
}
/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */
void sdsfreesplitres(sds *tokens, int count) {
if (!tokens) return;
while(count--)
sdsfree(tokens[count]);
s_free(tokens);
}
//按顺序替换掉字符,比如讲hello,中的ho替换为ll,长度为2。变成lelll。
sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen) {
size_t j, i, l = sdslen(s);

for (j = 0; j < l; j++) {
for (i = 0; i < setlen; i++) {
if (s[j] == from[i]) {
s[j] = to[i];
break;
}
}
}
return s;
}
void *sds_malloc(size_t size) { return s_malloc(size); }
void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
void sds_free(void *ptr) { s_free(ptr); }


看完了3.2的SDS代码后,发现该版本和redis设计与实现上写的是有一些差异的,代码量变大了,但是考虑到了更多种的情况,在实际运行过程中的性能会有一些提升,最大的改变就是使用多种结构体来存储不同的SDS,用不同类型来存储长度。

真长啊,代码,还有很多,继续加油!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: