您的位置:首页 > 运维架构 > Linux

mips架构linux启动分析(六)(arch_mem_init(node内存)内存初始化)

2017-11-22 09:42 891 查看
这里开始就是各个node的内存的初始化了。

OK,直接看代码.
static void __init arch_mem_init(char **cmdline_p)
{
extern void plat_mem_setup(void);

//定义板级的屏幕的相关信息
plat_mem_setup();
//把代码段也加入到boot_mem_map进行管理(bootm机制)
arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT,
PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT,
BOOT_MEM_RAM);
//把init段(初始化段)也加入到boot_mem_map中进行管理
arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, BOOT_MEM_INIT_RAM);

//在arcs_cmdline之后加入一个空格后,在把builtin_cmdline加入其中
if (builtin_cmdline[0]) {
strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE);
strlcat(arcs_cmdline, builtin_cmdline, COMMAND_LINE_SIZE);
}
//把arcs_cmdline拷贝到boot_command_line
strlcpy(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
//把boot_command_line拷贝到command_line中
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
//用指针cmdline_p指向command_line
*cmdline_p = command_line;
//对参数进行解析
parse_early_param();
//bootmem的初始化
bootmem_init();
//mips中crashkernel时的信息段
mips_parse_crashkernel();
//把crashkernel段设置为保留段
if (crashk_res.start != crashk_res.end)
reserve_bootmem(crashk_res.start,
crashk_res.end - crashk_res.start + 1,
BOOTMEM_DEFAULT);
//空函数
device_tree_init();
//分配的一个非线形段(no_liner section),具体作用,还不是很清楚??????????
sparse_init();
//tlb信息的初始化
plat_swiotlb_setup();
设置区zones
paging_init();
}

定义板级screen的信息:

void __init plat_mem_setup(void)
{
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;

screen_info = (struct screen_info) {
.orig_x			= 0,
.orig_y			= 25,
.orig_video_cols	= 80,
.orig_video_lines	= 25,
.orig_video_isVGA	= VIDEO_TYPE_VGAC,
.orig_video_points	= 16,
};
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
#endif
}

怎么把一个段加入到boot_mem_map中进行管理呢?

static void __init arch_mem_addpart(phys_t mem, phys_t end, int type)
{
phys_t size;
int i;

size = end - mem;
//如果要加入的段在boot_mem_map中,就退出
for (i = 0; i < boot_mem_map.nr_map; i++) {
if (mem >= boot_mem_map.map[i].addr &&
mem < (boot_mem_map.map[i].addr +
boot_mem_map.map[i].size))
return;
}
//把这段区域加入到boot_mem_map中
add_memory_region(mem, size, type);
}

void __init add_memory_region(phys_t start, phys_t size, long type)
{
int x = boot_mem_map.nr_map;
int i;

//把要加入的段和已有的段进行合并
for (i = 0; i < boot_mem_map.nr_map; i++) {
//从boot_mem_map第一个开始读取
struct boot_mem_map_entry *entry = boot_mem_map.map + i;
unsigned long top;
//如果类型不同,则进行下一个
if (entry->type != type)
continue;
//如果这个region大于要加入段的地址,则进行下一个
if (start + size < entry->addr)
continue;			/* no overlap */
//如果这个region小于要加入段的地址,则进行下一个
if (entry->addr + entry->size < start)
continue;			/* no overlap */
//到这里说明两个地址有重叠的部分,然后进行合并
top = max(entry->addr + entry->size, start + size);
entry->addr = min(entry->addr, start);
entry->size = top - entry->addr;

return;
}
//如果没有重叠的region,则增加一个新的boot_mem_map进行管理
boot_mem_map.map[x].addr = start;
boot_mem_map.map[x].size = size;
boot_mem_map.map[x].type = type;
boot_mem_map.nr_map++;
}


bootmem的初始化:

static void __init bootmem_init(void)
{
//initrd的初始化
init_initrd();
finalize_initrd();
}


static unsigned long __init init_initrd(void)
{
unsigned long end;

/*
*判断现在的initrd时有效的
*/
if (!initrd_start || initrd_end <= initrd_start)
goto disable;
//判断initrd是不是页对齐的
if (initrd_start & ~PAGE_MASK) {
pr_err("initrd start must be page aligned\n");
goto disable;
}

if (initrd_start < PAGE_OFFSET) {
pr_err("initrd start < PAGE_OFFSET\n");
goto disable;
}

//end保存initrd_end的物理地址
end = __pa(initrd_end);
//initrd_end保存initrd_end的虚拟地址
initrd_end = (unsigned long)__va(end);
//initrd_start保存initrd_start的虚拟地址
initrd_start = (unsigned long)__va(__pa(initrd_start));

ROOT_DEV = Root_RAM0;
//返回initrd_end的物理页帧地址
return PFN_UP(end);
disable:
initrd_start = 0;
initrd_end = 0;
return 0;
}

static void __init finalize_initrd(void)
{
unsigned long size = initrd_end - initrd_start;
//判断initrd段长是否为零
if (size == 0) {
printk(KERN_INFO "Initrd not found or empty");
goto disable;
}
//如果initrd_end的物理地址大于lax_low_pfn则initrd越界
if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
printk(KERN_ERR "Initrd extends beyond end of memory");
goto disable;
}
//把initrd内存设置为reserve
reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT);
initrd_below_start_ok = 1;

pr_info("Initial ramdisk at: 0x%lx (%lu bytes)\n",
initrd_start, size);
return;
disable:
printk(KERN_CONT " - disabling initrd\n");
initrd_start = 0;
initrd_end = 0;
}


把内存设置为reserve内存的方法

#define PFN_UP(x)	(((x)+PAGE_SIZE-1)>>PAGE_SHIFT) //低位有数则页帧号加1
#define PFN_DOWN(x)	((x)>>PAGE_SHIFT)		//舍弃低位

int __init reserve_bootmem(unsigned long addr, unsigned long size,
int flags)
{
unsigned long start, end;
//设置开始结束地址格式
start = PFN_DOWN(addr);
end = PFN_UP(addr + size);
//把start-end间的内存设置为reserve
return mark_bootmem(start, end, 1, flags);
}

static int __init mark_bootmem(unsigned long start, unsigned long end,
int reserve, int flags)
{
unsigned long pos;
bootmem_data_t *bdata;

pos = start;
//遍历bdata_list链表,找到要标记的内存的区间
list_for_each_entry(bdata, &bdata_list, list) {
int err;
unsigned long max;

if (pos < bdata->node_min_pfn ||
pos >= bdata->node_low_pfn) {
BUG_ON(pos != start);
continue;
}

max = min(bdata->node_low_pfn, end);
//调用a会念书mark_bootmem_node把区间设置为已分配
err = mark_bootmem_node(bdata, pos, max, reserve, flags);
if (reserve && err) {
mark_bootmem(start, pos, 0, 0);
return err;
}

if (max == end)
return 0;
pos = bdata->node_low_pfn;
}
BUG();
}

static int __init mark_bootmem_node(bootmem_data_t *bdata,
unsigned long start, unsigned long end,
int reserve, int flags)
{
unsigned long sidx, eidx;

bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
bdata - bootmem_node_data, start, end, reserve, flags);
//安全检查,确定要标记的内存的区间是存在的
BUG_ON(start < bdata->node_min_pfn);
BUG_ON(end > bdata->node_low_pfn);
//获取偏移大小
sidx = start - bdata->node_min_pfn;
eidx = end - bdata->node_min_pfn;
//调用__reserve设置为已分配,__free释放
if (reserve)
return __reserve(bdata, sidx, eidx, flags);
else
__free(bdata, sidx, eidx);
return 0;
}

static void __init __free(bootmem_data_t *bdata,
unsigned long sidx, unsigned long eidx)
{
unsigned long idx;
//记录这里分配的值
if (bdata->hint_idx > sidx)
bdata->hint_idx = sidx;
//调用test_and_clear_bit把对应的位清零
for (idx = sidx; idx < eidx; idx++)
if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
BUG();
}

static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
unsigned long eidx, int flags)
{
unsigned long idx;
int exclusive = flags & BOOTMEM_EXCLUSIVE;
//调用test_and_set_bit把位图对应的位设置位1,原子操作
for (idx = sidx; idx <
4000
; eidx; idx++)
if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
if (exclusive) {
__free(bdata, sidx, idx);
return -EBUSY;
}
}
return 0;
}


具体的各个node初始化函数:

void __init paging_init(void)
{
unsigned node;
unsigned long zones_size[MAX_NR_ZONES] = {0, };
//初始化tlb表项
pagetable_init();
//获取每个node的内存区间
for_each_online_node(node) {
unsigned long  start_pfn, end_pfn;

get_pfn_range_for_nid(node, &start_pfn, &end_pfn);

if (end_pfn > max_low_pfn)
max_low_pfn = end_pfn;
}
#ifdef CONFIG_ZONE_DMA32
//设置ZONE_DMA32区间大小
zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
#endif
//设置NORMAL区间大小
zones_size[ZONE_NORMAL] = max_low_pfn;
//对这个node的zone进行初始化
free_area_init_nodes(zones_size);
}

void __init free_area_init_nodes(unsigned long *max_zone_pfn)
{
unsigned long start_pfn, end_pfn;
int i, nid;

/*arch_zone_lowest_possible_pfn记录每个node的起始地址
*arch_zone_highest_possible_pfn记录每个node的结束地址
*在这里进行清零操作
*/
memset(arch_zone_lowest_possible_pfn, 0,
sizeof(arch_zone_lowest_possible_pfn));
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
//根据memblock找到目前系统中的最低内存地址
arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
//根据参数传递进来的值,得到最大的物理内存
arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
for (i = 1; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE)
continue;
//循环操作,第1个块的低地址,是第0块的高地址
arch_zone_lowest_possible_pfn[i] =
arch_zone_highest_possible_pfn[i-1];
arch_zone_highest_possible_pfn[i] =
max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
}
arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;

/* Find the PFNs that ZONE_MOVABLE begins at in each node */
memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
find_zone_movable_pfns_for_nodes();
//对每个node都调用free_area_init_node进行初始化
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid, NULL,
find_min_pfn_for_node(nid), NULL);

/* Any memory on that node */
if (pgdat->node_present_pages)
node_set_state(nid, N_MEMORY);
check_for_memory(pgdat, nid);
}
}


看一下是怎么初始化每个node的内存区域的:

void __init_refok free_area_init_node(int nid, unsigned long *zones_size,
unsigned long node_start_pfn, unsigned long *zholes_size)
{
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long start_pfn = 0;
unsigned long end_pfn = 0;

//初始化first_deferred_pfn成员
reset_deferred_meminit(pgdat);
//设置node的号
pgdat->node_id = nid;
//设置开始地址
pgdat->node_start_pfn = node_start_pfn;
//获取这个node对应的内存的开始结束地址
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
//计算这个node的所有page(spanned_page),以及可用page(present_page)
calculate_node_totalpages(pgdat, start_pfn, end_pfn,
zones_size, zholes_size);
//如果不支持FLAT_MEM_NODE_MAP这个函数就为空
alloc_node_mem_map(pgdat);
//核心初始化函数,初始化node_data结构体
free_area_init_core(pgdat, start_pfn, end_pfn);
}


zone的结构体:
zone的结构体:
struct zone {
unsigned long watermark[NR_WMARK]; //水位设置,内存回收时使用
unsigned long percpu_drift_mark;
unsigned long lowmem_reserve[MAX_NR_ZONES];
unsigned long dirty_balance_reserve;

#ifdef CONFIG_NUMA
int node; //node号
unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
#endif
struct per_cpu_pageset __percpu *pageset;
spinlock_t lock;
int all_unreclaimable; /* All pages pinned */
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
bool compact_blockskip_flush;

unsigned long compact_cached_free_pfn;
unsigned long compact_cached_migrate_pfn;
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
seqlock_t span_seqlock;
#endif
struct free_area free_area[MAX_ORDER];

#ifndef CONFIG_SPARSEMEM
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

#ifdef CONFIG_COMPACTION
unsigned int compact_considered;
unsigned int compact_defer_shift;
int compact_order_failed;
#endif

ZONE_PADDING(_pad1_)

spinlock_t lru_lock;
struct lruvec lruvec;

atomic_long_t inactive_age;

unsigned long pages_scanned; /* since last reclaim */
unsigned long flags; /*zone flags*/
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];

unsigned int inactive_ratio;

ZONE_PADDING(_pad2_)
//等待队列
wait_queue_head_t * wait_table;
unsigned long wait_table_hash_nr_entries; //等待数量
unsigned long wait_table_bits;

struct pglist_data *zone_pgdat; //指向所属的pgdat

unsigned long zone_start_pfn; //zone的开始地址

unsigned long spanned_pages; //总的大小,包括hole
unsigned long present_pages; //不包括hole的大小
unsigned long managed_pages;

int nr_migrate_reserve_block;
const char *name; //名字

} ____cacheline_internodealigned_in_smp;

zone结构的初始化(这里很多初始化对齐作用不是很了解,只要先知道这里时初始化zone就好了;等了解了内存回收机制时, 对这里的作用就清楚了)

//zone结构体初始化,其中涉及到很多内存回收时使用的成员,
static void __paginginit free_area_init_core(struct pglist_data *pgdat,
unsigned long node_start_pfn, unsigned long node_end_pfn)
{
enum zone_type j;
int nid = pgdat->node_id;
int ret;
/初始化node_data中的成员
pgdat_resize_init(pgdat);
#ifdef CONFIG_NUMA_BALANCING
spin_lock_init(&pgdat->numabalancing_migrate_lock);
pgdat->numabalancing_migrate_nr_pages = 0;
pgdat->numabalancing_migrate_next_window = jiffies;
#endif
init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait);
pgdat_page_cgroup_init(pgdat);
//for循环,更新这个node中所有的zone的信息
for (j = 0; j < REAL_MAX_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, freesize, memmap_pages;
unsigned long zone_start_pfn;

zone_start_pfn = zone->zone_start_pfn;

size = zone->spanned_pages;
realsize = freesize = zone->present_pages;

memmap_pages = calc_memmap_size(size, realsize);
if (freesize >= memmap_pages) {
freesize -= memmap_pages;
if (memmap_pages)
printk(KERN_DEBUG
"%s zone: %lu pages used for memmap\n",
zone_names[j], memmap_pages);
} else
printk(KERN_WARNING
"%s zone: %lu pages exceeds freesize %lu\n",
zone_names[j], memmap_pages, freesize);

/* Account for reserved pages */
if (j == 0 && freesize > dma_reserve) {
freesize -= dma_reserve;
printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
zone_names[0], dma_reserve);
}
if (!is_highmem_idx(j))
nr_kernel_pages += freesize;
else if (nr_kernel_pages > memmap_pages * 2)
nr_kernel_pages -= memmap_pages;
nr_all_pages += freesize;

zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA
zone->node = nid;
zone->min_unmapped_pages=(freesize*sysctl_min_unmapped_ratio)/100;
zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
#endif
//更新zone的name
zone->name = zone_names[j];
//锁初始化
spin_lock_init(&zone->lock);
spin_lock_init(&zone->lru_lock);
zone_seqlock_init(zone);
//指向zone所属的pgdat
zone->zone_pgdat = pgdat;
zone_pcp_init(zone);

/* For bootup, initialized properly in watermark setup */
mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);

lruvec_init(&zone->lruvec);
if (!size)
continue;

set_pageblock_order();
setup_usemap(pgdat, zone, zone_start_pfn, size);
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
BUG_ON(ret);
memmap_init(size, nid, j, zone_start_pfn);
}
}


好了,现在node中的zone就填写完毕了。

OK,运行到这里,每个node对应的内存区域就已经初始化完毕了。

下面还由几个板级相关的点,记录下,比较重要,需要进行分析:

1,page_init中的pagetable_init函数,这是初始化pgd,pud,pmd,pte的表项,也就是虚拟地址到物理地址的转换

2,arch_mem_init函数中的aparse_init,功能数初始化一段非线形区域,具体作用还不是很清楚?????????????????????????????????????????

3,arch_mem_init函数中的plat_swiotlb_setup函数, 这是关于DMA操作,涉及到具体板卡的地址映射问题.

4,setup_arch函数中的plat_smp_setup,是mips多核cpu之间的IPI初始化,也就是核间互连寄存器的初始化(手册说的比较少,单看代码也不了解具体功能)

5,setup_arch函数中的cpu_cache_init函数,这是cpu的cache初始化。

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐