您的位置:首页 > 运维架构 > Linux

linux内存模型之buddy(伙伴)系统二分配与释放

2012-03-08 22:33 513 查看
===================================

本文系本站原创,欢迎转载!

转载请注明出处:http://blog.csdn.net/gdt_A20

===================================

一、摘要

最重要的还是分配与释放,下面看一下相关的几个函数;

分配函数:

1.alloc_pages(gfp_tgfp_mask,unsignedintorder);

用于请求2^order次方个连续的页,返回起始页的描述符;

2.alloc_page(gfp_mask);

请求分配一个单独的页,返回描述符;

3.__get_free_page(gfp_mask);

申请单独的页,但是返回它的线性地址;

4.__get_free_pages(gfp_mask,order);

用于请求2^order次方个连续的页,但是返回起始页的线性地址;

5.get_zeroed_page(gfp_mask);

请求一页,并且将该页清0,返回其线性地址;

6.__get_dma_pages(gfp_mask,order);

用于请求2^order次方个连续的dma页,返回其描述符

释放函数:

1.free_pages(addr,order);

注意只有到count为0的时候才真正的释放掉;

2.free_page(addr);

3.__free_page(page);



二、下***体的看一下这些函数.

include/linux/gfp.h

1.alloc_page(gfp_mask);

#definealloc_page(gfp_mask)alloc_pages(gfp_mask,0)

2..__get_free_page(gfp_mask);

#define__get_free_page(gfp_mask)\
		__get_free_pages((gfp_mask),0)



3.__get_free_pages(gfp_mask,order);

mm/page_alloc.c

unsignedlong__get_free_pages(gfp_tgfp_mask,unsignedintorder)
{
	structpage*page;

	/*
	*__get_free_pages()returnsa32-bitaddress,whichcannotrepresent
	*ahighmempage
	*/
	VM_BUG_ON((gfp_mask&__GFP_HIGHMEM)!=0);

	page=alloc_pages(gfp_mask,order);
	if(!page)
		return0;
	return(unsignedlong)page_address(page);
}
4.get_zeroed_page(gfp_mask);
unsignedlongget_zeroed_page(gfp_tgfp_mask)
{
return__get_free_pages(gfp_mask|__GFP_ZERO,0);
}

5..__get_dma_pages(gfp_mask,order);

#define__get_dma_pages(gfp_mask,order)\
__get_free_pages((gfp_mask)|GFP_DMA,(order))

看来都是一路货色,都最后调用了alloc_pages(gfp_tgfp_mask,unsignedintorder);

详细的看一下这个函数吧:

include/linux/gfp.h

#definealloc_pages(gfp_mask,order)\
alloc_pages_node(numa_node_id(),gfp_mask,order)在当前节点中分配页,

staticinlinestructpage*alloc_pages_node(intnid,gfp_tgfp_mask,
unsignedintorder)
{
/*Unknownnodeiscurrentnode*/
if(nid<0)
nid=numa_node_id();

return__alloc_pages(gfp_mask,order,node_zonelist(nid,gfp_mask));
}


staticinlinestructzonelist*node_zonelist(intnid,gfp_tflags)
{
returnNODE_DATA(nid)->node_zonelists+gfp_zonelist(flags);
}staticinlinestructpage*
__alloc_pages(gfp_tgfp_mask,unsignedintorder,
structzonelist*zonelist)
{
return__alloc_pages_nodemask(gfp_mask,order,zonelist,NULL);
}
structpage*
__alloc_pages_nodemask(gfp_tgfp_mask,unsignedintorder,
structzonelist*zonelist,nodemask_t*nodemask)
{
enumzone_typehigh_zoneidx=gfp_zone(gfp_mask);//根据mask选zone
structzone*preferred_zone;
structpage*page;
intmigratetype=allocflags_to_migratetype(gfp_mask);//选择一个类型的空闲表

gfp_mask&=gfp_allowed_mask;

lockdep_trace_alloc(gfp_mask);

might_sleep_if(gfp_mask&__GFP_WAIT);//是否可以睡眠

if(should_fail_alloc_page(gfp_mask,order))
returnNULL;

/*
*Checkthezonessuitableforthegfp_maskcontainatleastone
*validzone.It'spossibletohaveanemptyzonelistasaresult
*ofGFP_THISNODEandamemorylessnode
*/
if(unlikely(!zonelist->_zonerefs->zone))//如果没有管理区就返回了
returnNULL;

get_mems_allowed();
/*Thepreferredzoneisusedforstatisticslater*/
first_zones_zonelist(zonelist,high_zoneidx,//根据传入参数找到对应的zone进行分配
nodemask?:&cpuset_current_mems_allowed,
&preferred_zone);
if(!preferred_zone){//如果失败,返回
put_mems_allowed();
returnNULL;
}

/*Firstallocationattempt*/
page=get_page_from_freelist(gfp_mask|__GFP_HARDWALL,nodemask,order,//对应下面的slow这是一个快速的分配, zonelist,high_zoneidx,ALLOC_WMARK_LOW|ALLOC_CPUSET,//此时water线比较高也没有关系
preferred_zone,migratetype);
if(unlikely(!page))
page=__alloc_pages_slowpath(gfp_mask,order,//在water高的时候无法分配,可能需要降低一下water线,
zonelist,high_zoneidx,nodemask,//可能会启动页面回收进程进行页面回收
preferred_zone,migratetype);
put_mems_allowed();

trace_mm_page_alloc(page,order,gfp_mask,migratetype);
returnpage;
}对于该函数,kernel会根据gfp_mask标志走不同的路径,比如能睡眠情况,不能睡眠情况,高低端内存情况等等;

分配部分就到这里了,就不往下再贴了,大致过程就是这样;

下面看一下释放函数;

1.free_page(addr);

#definefree_page(addr)free_pages((addr),0)

2.__free_page(page);

#define__free_page(page)__free_pages((page),0)void__free_pages(structpage*page,unsignedintorder)
{
if(put_page_testzero(page)){
if(order==0)
free_hot_cold_page(page,0);
else
__free_pages_ok(page,order);
}
}

3.free_pages(addr,order);

free_page也会调用掉这里,

voidfree_pages(unsignedlongaddr,unsignedintorder)
{
if(addr!=0){
VM_BUG_ON(!virt_addr_valid((void*)addr));
__free_pages(virt_to_page((void*)addr),order);
}
}

三个释放函数同样最后调用的相同的一个函数__free_pages,

void__free_pages(structpage*page,unsignedintorder)
{
if(put_page_testzero(page)){
if(order==0)//单个order为0,释放到冷热页缓存
free_hot_cold_page(page,0);
else
__free_pages_ok(page,order);//否则释放到buddy
}
}冷热页部分;

/*
*Freea0-orderpage
*cold==1?freeacoldpage:freeahotpage
*/
voidfree_hot_cold_page(structpage*page,intcold)
{
structzone*zone=page_zone(page);//page所在zone
structper_cpu_pages*pcp;
unsignedlongflags;
intmigratetype;
intwasMlocked=__TestClearPageMlocked(page);

if(!free_pages_prepare(page,0))
return;

migratetype=get_pageblock_migratetype(page);//page属于的链表
set_page_private(page,migratetype);
local_irq_save(flags);
if(unlikely(wasMlocked))
free_page_mlock(page);
__count_vm_event(PGFREE);

/*
*Weonlytrackunmovable,reclaimableandmovableonpcplists.
*FreeISOLATEpagesbacktotheallocatorbecausetheyarebeing
*offlinedbuttreatRESERVEasmovablepagessowecangetthose
*areasbackifnecessary.Otherwise,wemayhavetofree
*excessivelyintothepageallocator
*/
if(migratetype>=MIGRATE_PCPTYPES){//不是冷热页的内存
if(unlikely(migratetype==MIGRATE_ISOLATE)){
free_one_page(zone,page,0,migratetype);//释放到对应空闲链表
gotoout;
}
migratetype=MIGRATE_MOVABLE;
}

pcp=&this_cpu_ptr(zone->pageset)->pcp;//得到该cpu冷热也结构
if(cold)
list_add_tail(&page->lru,&pcp->lists[migratetype]);//冷页加入冷链表
else
list_add(&page->lru,&pcp->lists[migratetype]);//热页加入热链表
pcp->count++;
if(pcp->count>=pcp->high){//冷热页太多了,超了就释放点到buddy中
free_pcppages_bulk(zone,pcp->batch,pcp);
pcp->count-=pcp->batch;
}

out:
local_irq_restore(flags);
}

释放到伙伴系统中部分;

taticvoid__free_pages_ok(structpage*page,unsignedintorder)
{
unsignedlongflags;
intwasMlocked=__TestClearPageMlocked(page);

if(!free_pages_prepare(page,order))
return;

local_irq_save(flags);
if(unlikely(wasMlocked))
free_page_mlock(page);
__count_vm_events(PGFREE,1<<order);
free_one_page(page_zone(page),page,order,
get_pageblock_migratetype(page));
local_irq_restore(flags);
}

staticvoidfree_one_page(structzone*zone,structpage*page,intorder,
intmigratetype)
{
spin_lock(&zone->lock);
zone->all_unreclaimable=0;
zone->pages_scanned=0;

__free_one_page(page,zone,order,migratetype);
__mod_zone_page_state(zone,NR_FREE_PAGES,1<<order);
spin_unlock(&zone->lock);
}


最后会调用到__free_one_page这里,
要释放的页,页所在的区,页所在区的order,哪个链表,

staticinlinevoid__free_one_page(structpage*page,
structzone*zone,unsignedintorder,
intmigratetype)
{
unsignedlongpage_idx;
unsignedlongcombined_idx;
unsignedlonguninitialized_var(buddy_idx);
structpage*buddy;

if(unlikely(PageCompound(page)))
if(unlikely(destroy_compound_page(page,order)))
return;

VM_BUG_ON(migratetype==-1);

page_idx=page_to_pfn(page)&((1<<MAX_ORDER)-1);//页号

VM_BUG_ON(page_idx&((1<<order)-1));
VM_BUG_ON(bad_range(zone,page));

while(order<MAX_ORDER-1){//2^3,order代表3
buddy_idx=__find_buddy_index(page_idx,order);//找朋友,找伙伴^.^!,找到在伙伴的位置,或者前或者后
buddy=page+(buddy_idx-page_idx);//找到伙伴的下标
if(!page_is_buddy(page,buddy,order))//不满足合并条件那么退出
break;

/*Ourbuddyisfree,mergewithitandmoveuponeorder.*/
list_del(&buddy->lru);//我们的伙伴很清闲,摘除他
zone->free_area[order].nr_free--;//将对应zone区域的order伙伴的空闲链表-1
rmv_page_order(buddy);//clearbuddy标志,

combined_idx=buddy_idx&page_idx;//得到一个新的下标
page=page+(combined_idx-page_idx);//得到新下标page,
page_idx=combined_idx;//更新新下标标号
order++;//以新下标为基础找更大的伙伴进行合并
}
set_page_order(page,order);
//跳出的时候:可能是不满足伙伴,另外可能是循环到达重点
/*
*Ifthisisnotthelargestpossiblepage,checkifthebuddy
*ofthenext-highestorderisfree.Ifitis,it'spossible
*thatpagesarebeingfreedthatwillcoalescesoon.Incase,
*thatishappening,addthefreepagetothetailofthelist
*soit'slesslikelytobeusedsoonandmorelikelytobemerged
*asahigherorderpage
*/
if((order<MAX_ORDER-2)&&pfn_valid_within(page_to_pfn(buddy))){//去除循环到达重点的情况,关注伙伴不满足的情况
structpage*higher_page,*higher_buddy;
combined_idx=buddy_idx&page_idx;//得到一个新下标
higher_page=page+(combined_idx-page_idx);//得到新下标对应的页
buddy_idx=__find_buddy_index(combined_idx,order+1);//得到order大一的伙伴
higher_buddy=page+(buddy_idx-combined_idx);//以新下标得到order大一的伙伴地址
if(page_is_buddy(higher_page,higher_buddy,order+1)){//判断新下标和order+1是否满足伙伴合并条件
list_add_tail(&page->lru,
&zone->free_area[order].free_list[migratetype]);//如果是把它加入对应链表,并且退出
gotoout;
}
}

list_add(&page->lru,&zone->free_area[order].free_list[migratetype]);//如果不满足伙伴条件并且无法再合并那么加入对应order的空闲链表
out:
zone->free_area[order].nr_free++;
}
结果:对应页找到最大的伙伴合并进去

/*
*Locatethestructpageforboththematchingbuddyinour
*pair(buddy1)andthecombinedO(n+1)pagetheyform(page).
*
*1)AnybuddyB1willhaveanorderOtwinB2whichsatisfies
*thefollowingequation:
*B2=B1^(1<<O)
*Forexample,ifthestartingbuddy(buddy2)is#8itsorder
*1buddyis#10:
*B2=8^(1<<1)=8^2=10
*
*2)AnybuddyBwillhaveanorderO+1parentPwhich
*satisfiesthefollowingequation:
*P=B&~(1<<O)
*
*Assumption:*_mem_mapiscontiguousatleastuptoMAX_ORDER
*/
staticinlineunsignedlong
__find_buddy_index(unsignedlongpage_idx,unsignedintorder)
{
returnpage_idx^(1<<order);
}//计算寻找伙伴,如果order为0,page_idx==0,那么伙伴就是0^1==1.

//如果order是4,page_idx==0,那么伙伴就是0^(1<<4)==16,

符合2^4==16

//总之是以page_idx为起始,2^order为大小的伙伴,返回该伙伴的标号

//如果page_idx==5,order为0,那么5^(1<<0)==4前移一位

//如果page_idx==5,order为1,那么5^(1<<1)==7,

//如果page_idx==5,order为2,那么5^(1<<2)==1,移动到前面的伙伴

总结:当page_idx>(1<<order)时,如果order为偶数,则返回前一个伙伴,如果是奇数,返回后一个伙伴

当page_idx<(1<<order)时,返回后面的伙伴,

####找到伙伴还要确定是否为可以合并
/*
*Thisfunctioncheckswhetherapageisfree&&isthebuddy
*wecandocoalesceapageanditsbuddyif
*(a)thebuddyisnotinahole&&
*(b)thebuddyisinthebuddysystem&&
*(c)apageanditsbuddyhavethesameorder&&
*(d)apageanditsbuddyareinthesamezone.
*
*Forrecordingwhetherapageisinthebuddysystem,weset->_mapcount-2.
*Setting,clearing,andtesting_mapcount-2isserializedbyzone->lock.
*
*Forrecordingpage'sorder,weusepage_private(page).
*/
staticinlineintpage_is_buddy(structpage*page,structpage*buddy,
intorder)
{
if(!pfn_valid_within(page_to_pfn(buddy)))//确定该内存是实际可用内存不是空洞
return0;

if(page_zone_id(page)!=page_zone_id(buddy))//该页面和伙伴页面在一个zone
return0;

if(PageBuddy(buddy)&&page_order(buddy)==order){//order必须一样
VM_BUG_ON(page_count(buddy)!=0);
return1;
}
return0;
}
####由此可以看出合并为伙伴的前提条件:

1.不是空洞

2.相同的zone

3.order一样

三、总结

buddy的分配释放函数就到这里了.

Thanks
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: