您的位置:首页 > 运维架构 > Linux

Linux 内存管理(4)paging_init(2)

2014-11-21 16:03 399 查看

1. bootmem_init

内核代码像是一个美丽的女人,把最美丽的地方放在最神秘的位置。在跋山涉水之后,终于到了揭开面纱的时刻。

arch/arm64/mm/init.c

180 void __init bootmem_init(void)

181 {

182 unsigned long min, max;

183

184 min = PFN_UP(memblock_start_of_DRAM());

185 max = PFN_DOWN(memblock_end_of_DRAM());

186

187 /*

188 * Sparsemem tries to allocate bootmem in memory_present(), so must be

189 * done after the fixed reservations.

190 */

191 arm64_memory_present();

192

193 sparse_init();

194 zone_sizes_init(min, max);

195

196 high_memory = __va((max << PAGE_SHIFT) - 1) + 1;

197 max_pfn = max_low_pfn = max;

198 }

199

paging_init的主要功能,由 191,193,194行的函数实现。

CONFIG_SPARSEMEM在内核编译中有设置。

124 static void arm64_memory_present(void)

125 {

126 struct memblock_region *reg;

127

128 for_each_memblock(memory, reg)

129 memory_present(0, memblock_region_memory_base_pfn(reg),

130 memblock_region_memory_end_pfn(reg));

131 }

mm/sparse.c

/* Record a memory area against a node. */

void __init memory_present(int nid, unsigned long start, unsigned long end)

{

unsigned long pfn;

start &= PAGE_SECTION_MASK;

mminit_validate_memmodel_limits(&start, &end);

for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {

unsigned long section = pfn_to_section_nr(pfn);

struct mem_section *ms;

sparse_index_init(section, nid);

set_section_nid(section, nid);

ms = __nr_to_section(section);

if (!ms->section_mem_map)

ms->section_mem_map = sparse_encode_early_nid(nid) |

SECTION_MARKED_PRESENT;

}

}

2 sparse_init

468 void __init sparse_init(void)

469 {

579

580 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {

581 if (!present_section_nr(pnum))

582 continue;

583

584 usemap = usemap_map[pnum];

585 if (!usemap)

586 continue;

587

588 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER

589 map = map_map[pnum];

590 #else

591 map = sparse_early_mem_map_alloc(pnum);

592 #endif

593 if (!map)

594 continue;

595

596 sparse_init_one_section(__nr_to_section(pnum), pnum, map,

597 usemap);

598 }

599

600 vmemmap_populate_print_last();

601

602 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER

603 free_bootmem(__pa(map_map), size2);

604 #endif

605 free_bootmem(__pa(usemap_map), size);

606 }

607

sparse_init是一个相当复杂的函数,591行分配mem_map.

443 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)

444 {

445 struct page *map;

446 struct mem_section *ms = __nr_to_section(pnum);

447 int nid = sparse_early_nid(ms);

448

449 map = sparse_mem_map_populate(pnum, nid);

450 if (map)

451 return map;

452

453 printk(KERN_ERR "%s: sparsemem memory map backing failed "

454 "some memory will not be available.\n", __func__);

455 ms->section_mem_map = 0;

456 return NULL;

457 }

mm/sparse-vmemmap.c

struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)

{

unsigned long start;

unsigned long end;

struct page *map;

map = pfn_to_page(pnum * PAGES_PER_SECTION);

start = (unsigned long)map;

end = (unsigned long)(map + PAGES_PER_SECTION);

if (vmemmap_populate(start, end, nid))

return NULL;

return map;

}

arch/arm64/mm/mmu.c

399 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)

400 {

401 unsigned long addr = start;

402 unsigned long next;

403 pgd_t *pgd;

404 pud_t *pud;

405 pmd_t *pmd;

406

407 do {

408 next = pmd_addr_end(addr, end);

409

410 pgd = vmemmap_pgd_populate(addr, node);

411 if (!pgd)

412 return -ENOMEM;

413

414 pud = vmemmap_pud_populate(pgd, addr, node);

415 if (!pud)

416 return -ENOMEM;

417

418 pmd = pmd_offset(pud, addr);

419 if (pmd_none(*pmd)) {

420 void *p = NULL;

421

422 p = vmemmap_alloc_block_buf(PMD_SIZE, node);

423 if (!p)

424 return -ENOMEM;

425

426 set_pmd(pmd, __pmd(__pa(p) | prot_sect_kernel));

427 } else

428 vmemmap_verify((pte_t *)pmd, node, addr, next);

429 } while (addr = next, addr != end);

430

431 return 0;

432 }

422行 分配一个物理内存页,把物理地址放到PMD中

49 void * __meminit vmemmap_alloc_block(unsigned long size, int node)

50 {

51 /* If the main allocator is up use that, fallback to bootmem. */

52 if (slab_is_available()) {

53 struct page *page;

54

55 if (node_state(node, N_HIGH_MEMORY))

56 page = alloc_pages_node(

57 node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,

58 get_order(size));

59 else

60 page = alloc_pages(

61 GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,

62 get_order(size));

63 if (page)

64 return page_address(page);

65 return NULL;

66 } else

67 return __earlyonly_bootmem_alloc(node, size, size,

68 __pa(MAX_DMA_ADDRESS));

69 }

70

71 /* need to make sure size is all the same during early stage */

72 void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)

73 {

74 void *ptr;

75

76 if (!vmemmap_buf)

77 return vmemmap_alloc_block(size, node);

78

79 /* take the from buf */

80 ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);

81 if (ptr + size > vmemmap_buf_end)

82 return vmemmap_alloc_block(size, node);

83

84 vmemmap_buf = ptr + size;

85

86 return ptr;

87 }

38 static void * __init_refok __earlyonly_bootmem_alloc(int node,

39 unsigned long size,

40 unsigned long align,

41 unsigned long goal)

42 {

43 return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);

44 }

如果你对着个内存分配十分感兴趣,在mm/nobootmem.c中可以找到函数定义,最终会调用到memblock_alloc.

sparse_init完成,每个物理页面均分配了相应的页表。

3. zone_sizes_init

arch/arm64/mm/init.c

72 static void __init zone_sizes_init(unsigned long min, unsigned long max)

73 {

74 struct memblock_region *reg;

75 unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];

76 unsigned long max_dma32 = min;

77

78 memset(zone_size, 0, sizeof(zone_size));

79

80 #ifdef CONFIG_ZONE_DMA32

81 /* 4GB maximum for 32-bit only capable devices */

82 max_dma32 = max(min, min(max, MAX_DMA32_PFN));

83 zone_size[ZONE_DMA32] = max_dma32 - min;

84 #endif

85 zone_size[ZONE_NORMAL] = max - max_dma32;

86

87 memcpy(zhole_size, zone_size, sizeof(zhole_size));

88

89 for_each_memblock(memory, reg) {

90 unsigned long start = memblock_region_memory_base_pfn(reg);

91 unsigned long end = memblock_region_memory_end_pfn(reg);

92

93 if (start >= max)

94 continue;

95 #ifdef CONFIG_ZONE_DMA32

96 if (start < max_dma32) {

97 unsigned long dma_end = min(end, max_dma32);

98 zhole_size[ZONE_DMA32] -= dma_end - start;

99 }

100 #endif

101 if (end > max_dma32) {

102 unsigned long normal_end = min(end, max);

103 unsigned long normal_start = max(start, max_dma32);

104 zhole_size[ZONE_NORMAL] -= normal_end - normal_start;

105 }

106 }

107

108 free_area_init_node(0, zone_size, min, zhole_size);

109 }

110

73-107行,主要是设置zone_size, zhole_size两个变量。

mm/page_alloc.c

4887 void __paginginit free_area_init_node(int nid, unsigned long *zones_size,

4888 unsigned long node_start_pfn, unsigned long *zholes_size)

4889 {

4890 pg_data_t *pgdat = NODE_DATA(nid);

4891

4892 /* pg_data_t should be reset to zero when it's allocated */

4893 WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);

4894

4895 pgdat->node_id = nid;

4896 pgdat->node_start_pfn = node_start_pfn;

4897 init_zone_allows_reclaim(nid);

4898 calculate_node_totalpages(pgdat, zones_size, zholes_size);

4899

4900 alloc_node_mem_map(pgdat);

4901 #ifdef CONFIG_FLAT_NODE_MEM_MAP

4902 printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",

4903 nid, (unsigned long)pgdat,

4904 (unsigned long)pgdat->node_mem_map);

4905 #endif

4906

4907 free_area_init_core(pgdat, zones_size, zholes_size);

4908 }

4888-4906对Node的pgdat的部分变量进行设置。4907完成主要工作。

4754 static void __paginginit free_area_init_core(struct pglist_data *pgdat,

4755 unsigned long *zones_size, unsigned long *zholes_size)

4756 {

4757 enum zone_type j;

4758 int nid = pgdat->node_id;

4759 unsigned long zone_start_pfn = pgdat->node_start_pfn;

4760 int ret;

4761

4762 pgdat_resize_init(pgdat);

4763 #ifdef CONFIG_NUMA_BALANCING

4764 spin_lock_init(&pgdat->numabalancing_migrate_lock);

4765 pgdat->numabalancing_migrate_nr_pages = 0;

4766 pgdat->numabalancing_migrate_next_window = jiffies;

4767 #endif

4768 init_waitqueue_head(&pgdat->kswapd_wait);

4769 init_waitqueue_head(&pgdat->pfmemalloc_wait);

4770 pgdat_page_cgroup_init(pgdat);

4771

4772 for (j = 0; j < MAX_NR_ZONES; j++) {

4773 struct zone *zone = pgdat->node_zones + j;

4774 unsigned long size, realsize, freesize, memmap_pages;

4775

4776 size = zone_spanned_pages_in_node(nid, j, zones_size);

4777 realsize = freesize = size - zone_absent_pages_in_node(nid, j,

4778 zholes_size);

4779

4780 /*

4781 * Adjust freesize so that it accounts for how much memory

4782 * is used by this zone for memmap. This affects the watermark

4783 * and per-cpu initialisations

4784 */

4785 memmap_pages = calc_memmap_size(size, realsize);

4786 if (freesize >= memmap_pages) {

4787 freesize -= memmap_pages;

4788 if (memmap_pages)

4789 printk(KERN_DEBUG

4790 " %s zone: %lu pages used for memmap\n",

4791 zone_names[j], memmap_pages);

4792 } else

4793 printk(KERN_WARNING

4794 " %s zone: %lu pages exceeds freesize %lu\n",

4795 zone_names[j], memmap_pages, freesize);

4796

4797 /* Account for reserved pages */

4798 if (j == 0 && freesize > dma_reserve) {

4799 freesize -= dma_reserve;

4800 printk(KERN_DEBUG " %s zone: %lu pages reserved\n",

4801 zone_names[0], dma_reserve);

4802 }

4803

4804 if (!is_highmem_idx(j))

4805 nr_kernel_pages += freesize;

4806 /* Charge for highmem memmap if there are enough kernel pages */

4807 else if (nr_kernel_pages > memmap_pages * 2)

4808 nr_kernel_pages -= memmap_pages;

4809 nr_all_pages += freesize;

4810

4811 zone->spanned_pages = size;

4812 zone->present_pages = realsize;

4813 /*

4814 * Set an approximate value for lowmem here, it will be adjusted

4815 * when the bootmem allocator frees pages into the buddy system.

4816 * And all highmem pages will be managed by the buddy system.

4817 */

4818 zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;

4819 #ifdef CONFIG_NUMA

4820 zone->node = nid;

4821 zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)

4822 / 100;

4823 zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;

4824 #endif

4825 zone->name = zone_names[j];

4826 spin_lock_init(&zone->lock);

4827 spin_lock_init(&zone->lru_lock);

4828 zone_seqlock_init(zone);

4829 zone->zone_pgdat = pgdat;

4830

4831 zone_pcp_init(zone);

4832 lruvec_init(&zone->lruvec);

4833 if (!size)

4834 continue;

4835

4836 set_pageblock_order();

4837 setup_usemap(pgdat, zone, zone_start_pfn, size);

4838 ret = init_currently_empty_zone(zone, zone_start_pfn,

4839 size, MEMMAP_EARLY);

4840 BUG_ON(ret);

4841 memmap_init(size, nid, j, zone_start_pfn);

4842 zone_start_pfn += size;

4843 }

4844 }

至此Node的pgdat和Zone设置完毕。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: