您的位置:首页 > 理论基础 > 数据结构算法

linux内存模型之buddy(伙伴)系统一概览及相关数据结构

2012-03-07 00:28 1046 查看
==================================

本文系本站原创,欢迎转载!

转载请注明出处:http://blog.csdn.net/gdt_A20

==================================

一、摘要

前面说过的bootmem分配器只不过是早期内存分配的一种方法,linux最终底层是以伙伴系统为模型,

伙伴系统的基本原理以页为最小单元,可以按2^0,2^1,2^2,...2^20...2^n (n <= 11)进行分配,再组合,

这种分配方式下可以在一定程度上处理碎片问题.建立在页之上的结构是区,由于页的物理地址问题,

实际内存比较大,dma专用性强等,所以内核

把页划分为不同的区,常见区包括:

ZONE_DMA---用于dma操作,

ZONE_NORMAL---直接映射的页,比如耳熟能详的896M,

ZONE_HIGHMEM---高端内存。

pcp冷热页缓存

buddy部分有冷热页缓存的概念,用zone的pageset成员实现,页是热的代表页位于cpu告诉缓存中,数据能够更快的被

访问,冷页,表示页面不在告诉缓存中,

计划分三个部分来说明:

1.相关数据结构

2.分配与释放

3.从bootmem到buddy的过渡

二、相关数据结构

1.对于一页内存(比如4k),kernel中用一个struct page结构来标识

include/linux/mm_types.h中

struct page {
	unsigned long flags;		  //页的状态
	struct address_space *mapping;	  //该页所在地址空间的描述指针
	struct {
		union {
			pgoff_t index;		
			void *freelist;		
		};
		union {
			unsigned long counters;   //页的引用计数
			struct {
				union {
					atomic_t _mapcount;    //页映射计数器
					struct {
						unsigned inuse:16;
						unsigned objects:15;
						unsigned frozen:1;
					};
				};
				atomic_t _count;		/* Usage count, see below. */
			};
		};
	};

	/* Third double word block */
	union {
		struct list_head lru;	/* Pageout list, eg. active_list   
					 * protected by zone->lru_lock !
					 */
		struct {		/* slub per cpu partial pages */
			struct page *next;	/* Next partial slab */
#ifdef CONFIG_64BIT
			int pages;	/* Nr of partial slabs left */
			int pobjects;	/* Approximate # of objects */
#else
			short int pages;
			short int pobjects;
#endif
		};
	};

	/* Remainder is not double word aligned */
	union {
		unsigned long private;		
#if USE_SPLIT_PTLOCKS
		spinlock_t ptl;
#endif
		struct kmem_cache *slab;	/* SLUB: Pointer to slab */
		struct page *first_page;	/* Compound tail pages */
	};

	
#if defined(WANT_PAGE_VIRTUAL)
	void *virtual;			/* Kernel virtual address (NULL if
					   not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
	unsigned long debug_flags;	/* Use atomic bitops on this */
#endif

#ifdef CONFIG_KMEMCHECK
	
	void *shadow;
#endif
}
2.区的标识,区的标识有同样的一个数据结构:

include/linux/mm_zone.h

struct zone {
	/* Fields commonly accessed by the page allocator */

	/* zone watermarks, access with *_wmark_pages(zone) macros */
	unsigned long watermark[NR_WMARK];

	/*
	 * When free pages are below this point, additional steps are taken
	 * when reading the number of free pages to avoid per-cpu counter
	 * drift allowing watermarks to be breached
	 */
	unsigned long percpu_drift_mark;

	/*
	 * We don't know if the memory that we're going to allocate will be freeable
	 * or/and it will be released eventually, so to avoid totally wasting several
	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
	 * to run OOM on the lower zones despite there's tons of freeable ram
	 * on the higher zones). This array is recalculated at runtime if the
	 * sysctl_lowmem_reserve_ratio sysctl changes.
	 */
	unsigned long		lowmem_reserve[MAX_NR_ZONES];

#ifdef CONFIG_NUMA
	int node;
	/*
	 * zone reclaim becomes active if more unmapped pages exist.
	 */
	unsigned long		min_unmapped_pages;
	unsigned long		min_slab_pages;
#endif
	struct per_cpu_pageset __percpu *pageset;
	/*
	 * free areas of different sizes
	 */
	spinlock_t		lock;
	int                     all_unreclaimable; /* All pages pinned */
#ifdef CONFIG_MEMORY_HOTPLUG
	/* see spanned/present_pages for more description */
	seqlock_t		span_seqlock;
#endif
	struct free_area	free_area[MAX_ORDER];      //空闲结构

#ifndef CONFIG_SPARSEMEM
	/*
	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
	 * In SPARSEMEM, this map is stored in struct mem_section
	 */
	unsigned long		*pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

#ifdef CONFIG_COMPACTION
	/*
	 * On compaction failure, 1<<compact_defer_shift compactions
	 * are skipped before trying again. The number attempted since
	 * last failure is tracked with compact_considered.
	 */
	unsigned int		compact_considered;
	unsigned int		compact_defer_shift;
#endif

	ZONE_PADDING(_pad1_)

	/* Fields commonly accessed by the page reclaim scanner */
	spinlock_t		lru_lock;	
	struct zone_lru {
		struct list_head list;
	} lru[NR_LRU_LISTS];

	struct zone_reclaim_stat reclaim_stat;

	unsigned long		pages_scanned;	   /* since last reclaim */
	unsigned long		flags;		   /* zone flags, see below */

	/* Zone statistics */
	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];

	/*
	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
	 * this zone's LRU.  Maintained by the pageout code.
	 */
	unsigned int inactive_ratio;

	ZONE_PADDING(_pad2_)
	/* Rarely used or read-mostly fields */

	/*
	 * wait_table		-- the array holding the hash table
	 * wait_table_hash_nr_entries	-- the size of the hash table array
	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
	
	wait_queue_head_t	* wait_table;
	unsigned long		wait_table_hash_nr_entries;
	unsigned long		wait_table_bits;

	/*
	 * Discontig memory support fields.
	 */
	struct pglist_data	*zone_pgdat;     //与此连接的管理结构
	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
	unsigned long		zone_start_pfn;  //起始标号

	unsigned long		spanned_pages;	/* total size, including holes */
	unsigned long		present_pages;	/* amount of memory (excluding holes) */

	/*
	 * rarely used fields:
	 */
	const char		*name;          //name
} ____cacheline_internodealigned_in_smp;

3、节点

/linux/bootmem.h 
  typedef struct bootmem_data {
        unsigned long node_min_pfn;   //该节点的最小页号
        unsigned long node_low_pfn;   //该节点的最大页号
        void *node_bootmem_map;       //指向bootmem位图
        unsigned long last_end_off;   //用于快速分配
        unsigned long hint_idx;
        struct list_head list;        //连接到下一个节点
  } bootmem_data_t;


4.全局页mem_map结构

setup_arch-->paging_init--->bootmem_init-->arm_bootmem_free-->free_area_init_node-->alloc_node_mem_map
   size =  (end - start) * sizeof(struct page);
   map = alloc_remap(pgdat->node_id, size);
分配内存用于存放该节点所有页
用pgdat->node_mem_map标识
static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
{
	/* Skip empty nodes */
	if (!pgdat->node_spanned_pages)
		return;

#ifdef CONFIG_FLAT_NODE_MEM_MAP
	/* ia64 gets its own node_mem_map, before this, without bootmem */
	if (!pgdat->node_mem_map) {
		unsigned long size, start, end;
		struct page *map;

		/*
		 * The zone's endpoints aren't required to be MAX_ORDER
		 * aligned but the node_mem_map endpoints must be in order
		 * for the buddy allocator to function correctly.
		 */
		start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
		end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
		end = ALIGN(end, MAX_ORDER_NR_PAGES);
		size =  (end - start) * sizeof(struct page);
		map = alloc_remap(pgdat->node_id, size);
		if (!map)
			map = alloc_bootmem_node_nopanic(pgdat, size);  
		pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
	}
#ifndef CONFIG_NEED_MULTIPLE_NODES
	/*
	 * With no DISCONTIG, the global mem_map is just set as node 0's
	 */
	if (pgdat == NODE_DATA(0)) {
		mem_map = NODE_DATA(0)->node_mem_map;
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
		if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
			mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
	}
#endif
#endif /* CONFIG_FLAT_NODE_MEM_MAP */
}


三、总结:简要罗列了buddy会用到的相关数据结构;
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: