一 内存分配管理机制

memcached是一个高性能的,分布式内存对象缓存系统,用于在动态系统中减少数据库负载,提升性能。memcached有一个很有特色的内存管理方式,为了提高效率,默认情况下采用了名为Slab Allocator的机制分配管理内存空间。

memcached文档中关于slab allocator有这么一段话:

the primary goal of the slabs subsystem in memcached was to eliminate memory fragmentation issues totally by using fixed-size memory chunks coming from a few predetermined size classes.



二 源码分析

1 关键数据结构

<span style="font-family:Microsoft YaHei;font-size:12px;">/* When adding a setting, be sure to update process_stat_settings */
 * Globally accessible settings as derived from the commandline.
struct settings {
    //最大内存, 默认64M,最大2G。通过-m 设定
    size_t maxbytes;
    //最大连接数,默认1024 通过-c设定
    int maxconns;
    //tcp 端口号,通过-p 设置
    int port;
    //ucp 端口号,通过-U 设置
    int udpport;
    //监听IP或SOCKET地址 ,通过-l设定
    char *inter;
    int verbose;
    rel_time_t oldest_live; /* ignore existing items older than this */
    int evict_to_free;
    char *socketpath;   /* path to unix socket if using local socket */
    int access;  /* access mask (a la chmod) for unix domain socket */
    //slab分配增量因子,默认围1.25, 可通过-f设定
    double factor;          /* chunk size growth factor */
    //给一个key+value+flags 分配的最小字节数。 默认值为48. 可通过-n修改。
    int chunk_size;
    //工作线程数。默认围4, 可通过-t设定
    int num_threads;        /* number of worker (without dispatcher) libevent threads to run */
    char prefix_delimiter;  /* character that marks a key prefix (for stats) */
    int detail_enabled;     /* nonzero if we're collecting detailed stats */
    int reqs_per_event;     /* Maximum number of io to process on each  io-event. */
    bool use_cas;
    //使用协议, 试过-B参数设定。 可能值为:ascii, binary, or auto, 版本: 1.4.0+
    enum protocol binding_protocol;
    int backlog;
     //单个item最大字计数。默认1M。可通过-I参数修改。在1.4.2版本之后,这个值可以大于1M,必须小于128M。但memcached会抛出警告,大于1M将导致整体运行内存的增加和内存性能的降低。 版本: 1.4.2+
    int item_size_max;        /* Maximum item size, and upper end for slabs */
    bool sasl;              /* SASL on/off */

<span style="font-family:Microsoft YaHei;font-size:12px;">typedef struct _stritem {
    struct _stritem *next;
    struct _stritem *prev;
    struct _stritem *h_next;    /* hash chain next */
    rel_time_t      time;       /* least recent access */
    rel_time_t      exptime;    /* expire time */
    int             nbytes;     /* size of data */
    unsigned short  refcount;
    uint8_t         nsuffix;    /* length of flags-and-length string */
    uint8_t         it_flags;   /* ITEM_* above */
    uint8_t         slabs_clsid;/* which slab class we're in */
    uint8_t         nkey;       /* key length, w/terminating null and padding */
    /* this odd type prevents type-punning issues when we do
     * the little shuffle to save space when not using CAS. */
    union {
        uint64_t cas;
        char end;
    } data[];
    /* if it_flags & ITEM_CAS we have 8 bytes CAS */
    /* then null-terminated key */
    /* then " flags length\r\n" (no terminating null) */
    /* then data with terminating \r\n (no terminating null; it's binary!) */
} item;

<span style="font-family:Microsoft YaHei;font-size:12px;">typedef struct {
    unsigned int size;      /* sizes of items */
    unsigned int perslab;   /* how many items per slab */
    void **slots;           /* list of item ptrs */
    unsigned int sl_total;  /* size of previous array */
    unsigned int sl_curr;   /* first free slot */
    void *end_page_ptr;         /* pointer to next free item at end of page, or 0 */
    unsigned int end_page_free; /* number of items remaining at end of last alloced page */
    unsigned int slabs;     /* how many slabs were allocated for this class */
    void **slab_list;       /* array of slab pointers */
    unsigned int list_size; /* size of prev array */
    unsigned int killing;  /* index+1 of dying slab, or zero if none */
    size_t requested; /* The number of requested bytes */
} slabclass_t;

<span style="font-family:Microsoft YaHei;font-size:12px;">#define POWER_SMALLEST 1
#define POWER_LARGEST  200

2 分配算法的实现

<span style="font-family:Microsoft YaHei;font-size:12px;">int main()
    while (-1 != (c = getopt(argc, argv,…)
    //settings.factor 初始化为1.25,可以使用命令行参数-f进行设置
    slabs_init(settings.maxbytes, settings.factor, preallocate);

<span style="font-family:Microsoft YaHei;font-size:12px;">static void settings_init(void) {
    settings.use_cas = true;
    settings.access = 0700;
    settings.port = 11211;
    settings.udpport = 11211;
    /* By default this string should be NULL for getaddrinfo() */
    settings.inter = NULL;
    settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
    settings.maxconns = 1024;         /* to limit connections-related memory to about 5MB */
    settings.verbose = 0;
    settings.oldest_live = 0;
    settings.evict_to_free = 1;       /* push old items out of cache when memory runs out */
    settings.socketpath = NULL;       /* by default, not using a unix socket */
    settings.factor = 1.25;
    settings.chunk_size = 48;         /* space for a modest key and value */
    settings.num_threads = 4;         /* N workers */
    settings.num_threads_per_udp = 0;
    settings.prefix_delimiter = ':';
    settings.detail_enabled = 0;
    settings.reqs_per_event = 20;
    settings.backlog = 1024;
    settings.binding_protocol = negotiating_prot;
    settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */


从该设置setting的初始化函数可看出,settings.item_size_max = 1024 * 1024; 即每个slab默认的空间大小为1MB,settings.factor = 1.25; 默认设置item的size步长增长因子为1.25。使用命令行参数对setting进行定制后,调用slabs_init函数,根据配置的setting来初始化slabclass。slabs_init函数于Slabs.c文件中实现:
<span style="font-family:Microsoft YaHei;font-size:12px;">// slabs管理器初始化函数:limit默认64MB,prealloc默认false,可使用命令行参数’L’进行设置。
void slabs_init(const size_t limit, const double factor, const bool prealloc) {
    int i = POWER_SMALLEST - 1;	//#define POWER_SMALLEST 1;i初始化为0
    //item(_stritem):storing items within memcached
    unsigned int size = sizeof(item) + settings.chunk_size;//chunk_size:48 
    mem_limit = limit;  //limit默认64MB
    if (prealloc) {
        /* Allocate everything in a big chunk with malloc */
        mem_base = malloc(mem_limit);
        if (mem_base != NULL) {
            mem_current = mem_base;
            mem_avail = mem_limit;
        } else {
            fprintf(stderr, "Warning: Failed to allocate requested memory in"
                    " one large chunk.\nWill allocate in smaller chunks\n");
    //static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];
    //#define POWER_LARGEST  200
    memset(slabclass, 0, sizeof(slabclass));
    // /* settings.item_size_max: Maximum item size, and upper end for slabs,默认为1MB */
    while (++i < POWER_LARGEST && size <= settings.item_size_max / factor) {
        /* Make sure items are always n-byte aligned */
		//#define CHUNK_ALIGN_BYTES 8
        if (size % CHUNK_ALIGN_BYTES)    //确保size为CHUNK_ALIGN_BYTES的倍数,不够则向补足
            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
        slabclass[i].size = size;
        slabclass[i].perslab = settings.item_size_max / slabclass[i].size;  //记录每个slab中item的个数
        size *= factor;   //每次循环size的大小都增加factor倍
        if (settings.verbose > 1) {
            fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
                    i, slabclass[i].size, slabclass[i].perslab);
     power_largest = i; 
    slabclass[power_largest].size = settings.item_size_max;
    slabclass[power_largest].perslab = 1;
    if (settings.verbose > 1) {
        fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
                i, slabclass[i].size, slabclass[i].perslab);
    /* for the test suite:  faking of how much we've already malloc'd */
        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
        if (t_initial_malloc) {
            mem_malloced = (size_t)atol(t_initial_malloc);
#ifndef DONT_PREALLOC_SLABS  //已经定义了
        char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");

        if (pre_alloc == NULL || atoi(pre_alloc) != 0) {


slabclass的声明:static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];


<span style="font-family:Microsoft YaHei;font-size:12px;">/*
 * Figures out which slab class (chunk size) is required to store an item of
 * a given size.
  * Given object size, return id to use when allocating/freeing memory for object
 * 0 means error: can't store such a large object
unsigned int slabs_clsid(const size_t size) {
    int res = POWER_SMALLEST;
    if (size == 0)
        return 0;
    while (size > slabclass[res].size)
        if (res++ == power_largest)     /* won't fit in the biggest slab */
            return 0;  //分配的值不能满足
    return res;  //返回第一个大于size的索引值

根据返回的索引值即可定位到满足该size的slabclass项。从源码中可以看出:chunk的size初始值为sizeof(item)+settings.chunk_size(key 和 value所使用的最小空间,默认为48);chunk的大小以factor的倍数进行增长,最高为slab的最大值的一半,最后一个slab的大小为slab的最大值,这也是memcached所能允许分配的最大的item值。




