您的位置:首页 > 其它

Kernel启动流程源码解析 8 mm_init

2016-11-22 19:18 1046 查看
一 mm_init

1.0 mm_init

定义在init/main.c中

static void __init mm_init(void)
{
    /*
     * page_cgroup requires contiguous pages,
     * bigger than MAX_ORDER unless SPARSEMEM.
     */
    page_cgroup_init_flatmem(); // mem_cgroup_disabled为true,直接返回了
    mem_init(); // 从memboot分配器转化为伙伴系统分配器
    kmem_cache_init(); 初始化kmem_cache和kmalloc_caches,使能slab内存分配器
    percpu_init_late(); // 每cpu变量
    pgtable_cache_init(); // arm64中 #define pgtable_cache_init() do { } while (0)
    vmalloc_init();
}

1.1 mem_init

void __init mem_init(void)
{
    unsigned long reserved_pages, free_pages;
    struct memblock_region *reg;

    arm64_swiotlb_init(); // 初始化software IO TLB,用于DMA API

    max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; // max_pfn是物理内存的最大页帧号,PHYS_PFN_OFFSET是物理内存的起始地址的页帧号 // 最高内存页指针减去最低内存页指针,所以max_mapnr存放的是系统的内存页数

#ifndef CONFIG_SPARSEMEM_VMEMMAP
    /* this will put all unused low memory onto the freelists */
    free_unused_memmap();
#endif

    totalram_pages += free_all_bootmem(); // 释放空闲的页到伙伴系统分配器

    reserved_pages = free_pages = 0;

    for_each_memblock(memory, reg) { // 遍历memblock.memory,统计空闲的页和保留的页
        unsigned int pfn1, pfn2;
        struct page *page, *end;

        pfn1 = __phys_to_pfn(reg->base);
        pfn2 = pfn1 + __phys_to_pfn(reg->size);

        page = pfn_to_page(pfn1);
        end  = pfn_to_page(pfn2 - 1) + 1;

        do {
            if (PageReserved(page))
                reserved_pages++;
            else if (!page_count(page))
                free_pages++;
            page++;
        } while (page < end);
    }

    /*
     * Since our memory may not be contiguous, calculate the real number
     * of pages we have in this system.
     */
    pr_info("Memory:");
    num_physpages = 0;
    for_each_memblock(memory, reg) { // 计算系统中的内存大小,以页为单位
        unsigned long pages = memblock_region_memory_end_pfn(reg) -
            memblock_region_memory_base_pfn(reg);
        num_physpages += pages;
        printk(" %ldMB", pages >> (20 - PAGE_SHIFT));
    }
    printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));

    pr_notice("Memory: %luk/%luk available, %luk reserved\n",
          nr_free_pages() << (PAGE_SHIFT-10),
          free_pages << (PAGE_SHIFT-10),
          reserved_pages << (PAGE_SHIFT-10));

#define MLK(b, t) b, t, ((t) - (b)) >> 10
#define MLM(b, t) b, t, ((t) - (b)) >> 20
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)

    pr_notice("Virtual kernel memory layout:\n"
          "    vmalloc : 0x%16lx - 0x%16lx   (%6ld MB)\n" // 打印vmalloc映射区信息
#ifdef CONFIG_SPARSEMEM_VMEMMAP
          "    vmemmap : 0x%16lx - 0x%16lx   (%6ld MB)\n" // 打印vmemmap映射区信息
#endif
          "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n" // 打印内核模块映射区信息
          "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n" // 打印直接映射区信息
          "      .init : 0x%p" " - 0x%p" "   (%6ld kB)\n" // 打印内核init段信息
          "      .text : 0x%p" " - 0x%p" "   (%6ld kB)\n" // 打印内核程序段信息
          "      .data : 0x%p" " - 0x%p" "   (%6ld kB)\n", // 打印内核数据段信息
          MLM(VMALLOC_START, VMALLOC_END),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
          MLM((unsigned long)virt_to_page(PAGE_OFFSET),
              (unsigned long)virt_to_page(high_memory)),
#endif
          MLM(MODULES_VADDR, MODULES_END),
          MLM(PAGE_OFFSET, (unsigned long)high_memory),

          MLK_ROUNDUP(__init_begin, __init_end),
          MLK_ROUNDUP(_text, _etext),
          MLK_ROUNDUP(_sdata, _edata));

#undef MLK
#undef MLM
#undef MLK_ROUNDUP

    /*
     * Check boundaries twice: Some fundamental inconsistencies can be
     * detected at build time already.
     */
#ifdef CONFIG_COMPAT
    BUILD_BUG_ON(TASK_SIZE_32            > TASK_SIZE_64);
#endif
    BUILD_BUG_ON(TASK_SIZE_64            > MODULES_VADDR);
    BUG_ON(TASK_SIZE_64                > MODULES_VADDR);

    if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
        extern int sysctl_overcommit_memory;
        /*
         * On a machine this small we won't get anywhere without
         * overcommit, so turn it on by default.
         */
        sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
    }
}

1.2 kmem_cache_init

void __init kmem_cache_init(void)
{
    int i;

    kmem_cache = &kmem_cache_boot; // kmem_cache是第一个高速缓存
    setup_node_pointer(kmem_cache); // kmem_cache的(struct kmem_cache_node)node指向(struct array_cache)array

    if (num_possible_nodes() == 1)
        use_alien_caches = 0;

    for (i = 0; i < NUM_INIT_LISTS; i++)
        kmem_cache_node_init(&init_kmem_cache_node[i]); // 初始化init_kmem_cache_node

    set_up_node(kmem_cache, CACHE_CACHE); // 设置kmem_cache的kmem_cache_node

    /*
     * Fragmentation resistance on low memory - only use bigger
     * page orders on machines with more than 32MB of memory if
     * not overridden on the command line.
     */
    if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
        slab_max_order = SLAB_MAX_ORDER_HI;

    /* Bootstrap is tricky, because several objects are allocated
     * from caches that do not exist yet:
     * 1) initialize the kmem_cache cache: it contains the struct
     *    kmem_cache structures of all caches, except kmem_cache itself:
     *    kmem_cache is statically allocated.
     *    Initially an __init data area is used for the head array and the
     *    kmem_cache_node structures, it's replaced with a kmalloc allocated
     *    array at the end of the bootstrap.
     * 2) Create the first kmalloc cache.
     *    The struct kmem_cache for the new cache is allocated normally.
     *    An __init data area is used for the head array.
     * 3) Create the remaining kmalloc caches, with minimally sized
     *    head arrays.
     * 4) Replace the __init data head arrays for kmem_cache and the first
     *    kmalloc cache with kmalloc allocated arrays.
     * 5) Replace the __init data for kmem_cache_node for kmem_cache and
     *    the other cache's with kmalloc allocated memory.
     * 6) Resize the head arrays of the kmalloc caches to their final sizes.
     */

    /* 1) create the kmem_cache */

    /*
     * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
     */
    create_boot_cache(kmem_cache, "kmem_cache",
        offsetof(struct kmem_cache, array[nr_cpu_ids]) +
                  nr_node_ids * sizeof(struct kmem_cache_node *),
                  SLAB_HWCACHE_ALIGN); // 这里不太理解,kmem_cache_boot是全局静态变量,为什么要重新分配?
    list_add(&kmem_cache->list, &slab_caches); // 将kmem_cache->list插入slab_caches链表中

    /* 2+3) create the kmalloc caches */

    /*
     * Initialize the caches that provide memory for the array cache and the
     * kmem_cache_node structures first.  Without this, further allocations will
     * bug.
     */

    kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
                    kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS); // 创建kmalloc-ac高速缓存

    if (INDEX_AC != INDEX_NODE)
        kmalloc_caches[INDEX_NODE] =
            create_kmalloc_cache("kmalloc-node",
                kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS); // 创建kmalloc-node高速缓存

    slab_early_init = 0;

    /* 4) Replace the bootstrap head arrays */
    {
        struct array_cache *ptr;

        ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); // 分配arraycache_init

        memcpy(ptr, cpu_cache_get(kmem_cache),
               sizeof(struct arraycache_init)); // 将kmem_cache的当前CPU的array(struct array_cache)复制到新分配的arraycache_init
        /*
         * Do not assume that spinlocks can be initialized via memcpy:
         */
        spin_lock_init(&ptr->lock); // 初始化arraycache_init的自旋锁

        kmem_cache->array[smp_processor_id()] = ptr; // 将kmem_cache的当前CPU的array(struct array_cache)指向到新分配的arraycache_init

        ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); // 再次分配arraycache_init

        BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
               != &initarray_generic.cache);
        memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
               sizeof(struct arraycache_init)); // 将kmalloc_caches数组INDEX_AC下标的内容复制到新分配的arraycache_init
        /*
         * Do not assume that spinlocks can be initialized via memcpy:
         */
        spin_lock_init(&ptr->lock); // 初始化arraycache_init的自旋锁

        kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr; // 将kmalloc_caches数组INDEX_AC下标指向新分配的arraycache_init
    }
    /* 5) Replace the bootstrap kmem_cache_node */
    {
        int nid;

        for_each_online_node(nid) { // 遍历online的
            init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid); // 使用malloc分配kmem_cache_node,并用静态的init_kmem_cache_node对其赋值,最后将kmem_cache的node指向新分配的kmem_cache_node

            init_list(kmalloc_caches[INDEX_AC],
                  &init_kmem_cache_node[SIZE_AC + nid], nid); // 使用malloc分配kmem_cache_node,并用静态的init_kmem_cache_node对其赋值,最后将kmalloc_caches的node指向新分配的kmem_cache_node

            if (INDEX_AC != INDEX_NODE) {
                init_list(kmalloc_caches[INDEX_NODE],
                      &init_kmem_cache_node[SIZE_NODE + nid], nid);
            }
        }
    }

    create_kmalloc_caches(ARCH_KMALLOC_FLAGS); // 分配kmalloc_caches数组
}

static struct kmem_cache kmem_cache_boot = {
    .batchcount = 1,
    .limit = BOOT_CPUCACHE_ENTRIES,
    .shared = 1,
    .size = sizeof(struct kmem_cache),
    .name = "kmem_cache",
};

1.3 percpu_init_late

void __init percpu_init_late(void)
{
    struct pcpu_chunk *target_chunks[] =
        { pcpu_first_chunk, pcpu_reserved_chunk, NULL };
    struct pcpu_chunk *chunk;
    unsigned long flags;
    int i;

    for (i = 0; (chunk = target_chunks[i]); i++) {
        int *map;
        const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]);

        BUILD_BUG_ON(size > PAGE_SIZE);

        map = pcpu_mem_zalloc(size);
        BUG_ON(!map);

        spin_lock_irqsave(&pcpu_lock, flags);
        memcpy(map, chunk->map, size);
        chunk->map = map;
        spin_unlock_irqrestore(&pcpu_lock, flags);
    }
}

1.4 vmalloc_init

vmalloc分配的内存虚拟地址是连续的,而物理地址则无需连续。

void __init vmalloc_init(void)
{
    struct vmap_area *va;
    struct vm_struct *tmp;
    int i;

    for_each_possible_cpu(i) {
        struct vmap_block_queue *vbq;
        struct vfree_deferred *p;
        // 初始化vmap_block_queue
        vbq = &per_cpu(vmap_block_queue, i); 
        spin_lock_init(&vbq->lock);
        INIT_LIST_HEAD(&vbq->free);
        // 初始化vfree_deferred // 用于延迟释放vmalloc分配的内存
        p = &per_cpu(vfree_deferred, i);
        init_llist_head(&p->list);
        INIT_WORK(&p->wq, free_work);
    }

    /* Import existing vmlist entries. */
    for (tmp = vmlist; tmp; tmp = tmp->next) { // 将已经存在的线性区描述符vmap_area插入vmap_area_root的红黑树
        va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
        va->flags = VM_VM_AREA;
        va->va_start = (unsigned long)tmp->addr;
        va->va_end = va->va_start + tmp->size;
        va->vm = tmp;
        __insert_vmap_area(va);
    }

    vmap_area_pcpu_hole = VMALLOC_END;

    vmap_initialized = true; // 
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: