您的位置：首页 > 运维架构 > Linux

Linux用户空间内存区域的匿名映射

2014-01-09 18:01 369 查看

1

在调用mmap系统调用时，可以指定的标志(flag)参数：

#define MAP_SHARED0x01/* Share changes */

#define MAP_PRIVATE0x02/* Changes are private */

#define MAP_TYPE0x0f/* Mask for type of mapping */

#define MAP_FIXED0x10/* Interpret addr exactly */

#define MAP_ANONYMOUS0x20/* don't use a file */

#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED

# define MAP_UNINITIALIZED 0x4000000/* For anonymous mmap, memory could be uninitialized */

#else

# define MAP_UNINITIALIZED 0x0/* Don't support this flag */

#endif

MAP_SHARED

用于多个进程共享对一个文件的访问

MAP_PRIVATE

用于创建一个与数据源分离的私有映射，对区域的写入操作不影响数据源文件中的内容

MAP_FIXED

用于在指定的目标线性地址创建一个映射，不允许调整到其他地址

MAP_ANONYMOUS

用于创建与文件无关的映射，或者说没有数据源的映射

do_anonymous_page会调用alloc_zeroed_user_highpage_movable分配一个初始化为全0的内存页。

2

在vm_area_struct数据结构定义中，有一个双链表结点：anon_vma_chain

struct vm_area_struct{

......

/*

* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma

* list, after a COW of one of the file pages.A MAP_SHARED vma

* can only be in the i_mmap tree.An anonymous MAP_PRIVATE, stack

* or brk vma (with NULL file) can only be in an anon_vma list.

*/

 struct list_head anon_vma_chain;/* Serialized by mmap_sem &

* page_table_lock */

 struct anon_vma *anon_vma;/* Serialized by page_table_lock */

......

其中，struct anon_vma定义:

/*

* The anon_vma heads a list of private "related" vmas, to scan if

* an anonymous page pointing to this anon_vma needs to be unmapped:

* the vmas on the list will be related by forking, or by splitting.

* Since vmas come and go as they are split and merged (particularly

* in mprotect), the mapping field of an anonymous page cannot point

* directly to a vma: instead it points to an anon_vma, on whose list

* the related vmas can be easily linked or unlinked.

* After unlinking the last vma on the list, we must garbage collect

* the anon_vma object itself: we're guaranteed no page can be

* pointing to this anon_vma once its vma list is empty.

*/

struct anon_vma{

 struct anon_vma *root;/* Root of this anon_vma tree */

 struct mutex mutex;/* Serialize access to vma list */

/*

* The refcount is taken on an anon_vma when there is no

* guarantee that the vma of page tables will exist for

* the duration of the operation. A caller that takes

* the reference is responsible for clearing up the

* anon_vma if they are the last user on release

*/

atomic_t refcount;

/*

* NOTE: the LSB of the head.next is set by

* mm_take_all_locks() _after_ taking the above lock. So the

* head must only be read/written after taking the above lock

* to be sure to see a valid next pointer. The LSB bit itself

* is serialized by a system wide lock only visible to

* mm_take_all_locks() (mm_all_locks_mutex).

*/

 struct list_head head;/* Chain of private "related" vmas */

};

3

do_mmap

static inline unsigned long do_mmap(struct file *file, unsigned long addr,

 unsigned long len, unsigned long prot,

 unsigned long flag, unsigned long offset)

 unsigned long ret = -EINVAL;

 if ((offset + PAGE_ALIGN(len)) < offset)

 goto out;

 if (!(offset & ~PAGE_MASK))

 ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);

out:

 return ret;

if ((offset + PAGE_ALIGN(len)) < offset)

/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)

/*
* 'kernel.h' contains some often-used function prototypes etc
*/
#define __ALIGN_KERNEL(x, a)__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask)

即

if ((offset + (((len) + (PAGE_SIZE)) & ~(PAGE_SIZE-1))) < offset)

表示如果len太长，再进行align to page boundary操作就会溢出了，那么没有那么多的线性地址空间可以给它映射，因此失败。

if (!(offset & ~PAGE_MASK))

如果offset是位于页的边界处，则继续操作

ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);

其中最后一个参数代表了映射区域在文件中的页序号。

/*

* The caller must hold down_write(¤t->mm->mmap_sem).

*/

unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,

 unsigned long len, unsigned long prot,

 unsigned long flags, unsigned long pgoff)

 struct mm_struct * mm = current->mm;

 struct inode *inode;

 vm_flags_t vm_flags;

 int error;

 unsigned long reqprot = prot;

/*

* Does the application expect PROT_READ to imply PROT_EXEC?

* (the exception is when the underlying filesystem is noexec

*mounted, in which case we dont add PROT_EXEC.)

*/

 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))

 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))

 prot |= PROT_EXEC;

 if (!len)

 return -EINVAL;

 if (!(flags & MAP_FIXED))

 addr = round_hint_to_min(addr);

/* Careful about overflows.. */

 len = PAGE_ALIGN(len);

 if (!len)

 return -ENOMEM;

/* offset overflow? */

 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)

return -EOVERFLOW;

/* Too many mappings? */

 if (mm->map_count > sysctl_max_map_count)

 return -ENOMEM;

/* Obtain the address to map to. we verify (or select) it and ensure

* that it represents a valid section of the address space.

*/

 addr = get_unmapped_area(file, addr, len, pgoff, flags);

 if (addr & ~PAGE_MASK)

 return addr;

/* Do simple checking here so the lower-level routines won't have

* to. we assume access permissions have been handled by the open

* of the memory object, so we don't do any here.

*/

 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |

 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;

 if (flags & MAP_LOCKED)

 if (!can_do_mlock())

 return -EPERM;

/* mlock MCL_FUTURE? */

 if (vm_flags & VM_LOCKED){

 unsigned long locked, lock_limit;

 locked = len >> PAGE_SHIFT;

 locked += mm->locked_vm;

 lock_limit = rlimit(RLIMIT_MEMLOCK);

 lock_limit >>= PAGE_SHIFT;

 if (locked > lock_limit && !capable(CAP_IPC_LOCK))

 return -EAGAIN;

 inode = file ? file->f_path.dentry->d_inode : NULL;

 if (file){

 switch (flags & MAP_TYPE){

 case MAP_SHARED:

 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))

 return -EACCES;

/*

* Make sure we don't allow writing to an append-only

* file..

*/

 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))

 return -EACCES;

/*

* Make sure there are no mandatory locks on the file.

*/

 if (locks_verify_locked(inode))

 return -EAGAIN;

 vm_flags |= VM_SHARED | VM_MAYSHARE;

 if (!(file->f_mode & FMODE_WRITE))

 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);

/* fall through */

 case MAP_PRIVATE:

 if (!(file->f_mode & FMODE_READ))

 return -EACCES;

 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC){

 if (vm_flags & VM_EXEC)

 return -EPERM;

 vm_flags &= ~VM_MAYEXEC;

 if (!file->f_op || !file->f_op->mmap)

 return -ENODEV;

 break;

 default:

 return -EINVAL;

} else{

 switch (flags & MAP_TYPE){

 case MAP_SHARED:

/*

* Ignore pgoff.

*/

 pgoff = 0;

 vm_flags |= VM_SHARED | VM_MAYSHARE;

 break;

 case MAP_PRIVATE:

/*

* Set pgoff according to addr for anon_vma.

*/

 pgoff = addr >> PAGE_SHIFT;

 break;

 default:

 return -EINVAL;

 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);

 if (error)

 return error;

 return mmap_region(file, addr, len, flags, vm_flags, pgoff);

EXPORT_SYMBOL(do_mmap_pgoff);

/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
addr = get_unmapped_area(file, addr, len, pgoff, flags);
if (addr & ~PAGE_MASK)
return addr;

get_unmapped_area函数用于查找到一个可以安放请求的这么长的一个vma的线性地址范围，返回这个范围的起始地址。如果这个起始地址不是从页对齐处开始的，代表找到的这个地址是不符合要求的，因此也不再往下走了，直接返回。

但是是问题是，如果直接返回了，那么调用都会不会不做检查，直接认为内核已经完成了mmap的操作，而尝试去读写这块还没有与文件建立起关联的内存区域呢，会发生什么不可知的事？

【根据/article/6526483.html中的思想，当进程真正需要访问页时，会触发Page Fault，那么这一步关键是设置好相应的Page Fault handler以及相应struct的指针成员】

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航