您的位置:首页 > 其它

调用malloc时发生了什么(2) - sys_brk函数与VMA

2018-01-20 17:24 881 查看

调用malloc时发生了什么(2) - sys_brk函数

在上一篇中我们讲过,malloc实际调用了brk函数完成堆的分配,也了解了进程的内存布局。现在我们来看看,进程的内存布局到底是怎么实现的。具体如何堆指针增加减少的。增加减少,堆进程的影响是什么?一系列的问题等着我们去解决。

sys_brk函数(去掉了与本篇无关的代码)

SYSCALL_DEFINE1(brk, unsigned long, brk)
{
unsigned long retval;
unsigned long newbrk, oldbrk;
struct mm_struct *mm = current->mm;
struct vm_area_struct *next;
unsigned long min_brk;
bool populate;
LIST_HEAD(uf);

/*start_brk是进程创建时,指定这个进程堆的起始地址,这个值在进程的生命周期内是不会改变的。当然,每个进程的其值也不是固定的,这取决于是否开启/proc/sys/kernel/randomize_va_space的值*/
min_brk = mm->start_brk;

/*如果我们的入参brk小于start_brk,那么显然是一次“异常”的调用,直接go out返回当前的brk地址。我们系统调用brk(0)就会走这个分支*/
if (brk < min_brk)
goto out;

/*都需要页对其,方便映射,mm->brk可以理解为end_brk,即当前进程堆的末尾*/
newbrk = PAGE_ALIGN(brk);
oldbrk = PAGE_ALIGN(mm->brk);
/* 已经被映射过了,举个例子第一次调用brk(0x10),就把1-4096全部映射了,再调brk(0x10+0x10),oldbrk就会和newbrk一样。*/
if (oldbrk == newbrk)
goto set_brk;

/* Always allow shrinking brk. */
/*显然,调用free就会满足这个条件,减少堆,执行unmap*/
if (brk <= mm->brk) {
if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
goto set_brk;
goto out;
}

/*到这里,我们表示我们需要拓展我们进程的堆*/
/* Check against existing mmap mappings. */
/*如果存在vma,不太清楚什么意思,但是从调试的接过来看,因为next而got out的情况没有出现过。*/
next = find_vma(mm, oldbrk);
if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
goto out;

/* Ok, looks good - let it rip. */
/*do_brk函数才是brk函数的核心,里面创建一个vma,然后instert全局链表中*/
if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
goto out;

set_brk:
mm->brk = brk;
populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
up_write(&mm->mmap_sem);
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(oldbrk, newbrk - oldbrk);
/*注意返回的是brk,而不是pagesize过的newbrk*/
return brk;

out:
retval = mm->brk;
up_write(&mm->mmap_sem);
return retval;
}


这里,从上面可以看到,brk(addr)对应的内核函数,不考虑细节问题,sys_brk只是执行了current->brk = brk,设置新的end_brk值。但是如果要考虑细节,那么就需要仔细分析do_brk值了。

do_brk->do_brk_flags

函数较长,但是比较好理解。

/*
*  this is really a simplified "do_mmap".  it only handles
*  anonymous maps.  eventually we may be able to do some
*  brk-specific accounting here.
*/
static int **do_brk_flags**(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
unsigned long len;
struct rb_node **rb_link, *rb_parent;
pgoff_t pgoff = addr >> PAGE_SHIFT;
int error;
/*一些校验*/
len = PAGE_ALIGN(request);
if (len < request)
return -ENOMEM;
if (!len)
return 0;

/* Until we need other flags, refuse anything except VM_EXEC. */
if ((flags & (~VM_EXEC)) != 0)
return -EINVAL;
flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;

/*判断addr到addr+len的地址是否可用,主要检测是否内存不足、和mmap地址冲突,一般情况下都返回成功。这里不赘述*/
error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
if (offset_in_page(error))
return error;

error = mlock_future_check(mm, mm->def_flags, len);
if (error)
return error;

/*
* mm->mmap_sem is required to protect against another thread
* changing the mappings in case we sleep.
*/
verify_mm_writelocked(mm);

/*
* Clear old maps.  this also does some error checking for us
*/
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
&rb_parent)) {
if (do_munmap(mm, addr, len, uf))
return -ENOMEM;
}

/* Check against address space limits *after* clearing old maps... */
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
return -ENOMEM;

if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;

if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
return -ENOMEM;

/* Can we just expand an old private anonymous mapping? */
/*如果当前的start地址,是已存在的vma的结束地址,那么直接复用这个已存在的vma,不用新建一个vma,把当前vma的vm_end扩大即可。
如果当前的end地址,是已存在的vma的起始地址,那么复用,把当前的vma的start变小即可。
具体实现相当复杂,要考虑标志位,文件等,这里不展开了。
详细的见博客:http://edsionte.com/techblog/archives/3586
*/
vma = vma_merge(mm, prev, addr, addr + len, flags,
NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
if (vma)
goto out;

/*
* create a vma struct for an anonymous mapping
*/
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma) {
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}

INIT_LIST_HEAD(&vma->anon_vma_chain);
vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_pgoff = pgoff;
vma->vm_flags = flags;
vma->vm_page_prot = vm_get_page_prot(flags);
vma_link(mm, vma, prev, rb_link, rb_parent);
out:
perf_event_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
mm->data_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
vma->vm_flags |= VM_SOFTDIRTY;
return 0;
}


上面的逻辑很简单,就是创建一个VMA,他是一个struct vm_area_struct对象,这VMA有两个值很重要,就是vm_start和vm_end,记录着虚拟内存的起始地址和结束地址。

提一句,如果想看进程的VMA,可以在cat /proc/pid/maps下看,pid是具体的进程号。其内核实现函数是 show_map。

task_struct
{
...
struct mm_struct *mm;
...
}

struct mm_struct
{
...
struct rb_root mm_rb;
...
}


VMA被挂在mm_struct的mm_rb下,可以通过红黑树快速查找。

其次一个进程的所有VMA被用简单的链表链起来,方便遍历。

struct vm_area_struct

{



struct vm_area_struct *vm_next, *vm_prev;



}

我们回到主题,到现在,执行了如果当前堆的地址是0x1000,执行了brk(0x2000),之后,就创建了一个vma,其起始地址是0x1000,结束地址是0x2000。由于malloc的实现是考brk,我们姑且理解为,如果我们malloc(4096),内核只是简单的创建了一个vma,挂链,就这么简单。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: