Linux用户空间内存区域的匿名映射
2014-01-09 18:01
369 查看
1
在调用mmap系统调用时,可以指定的标志(flag)参数:#define MAP_SHARED0x01/* Share changes */
#define MAP_PRIVATE0x02/* Changes are private */
#define MAP_TYPE0x0f/* Mask for type of mapping */
#define MAP_FIXED0x10/* Interpret addr exactly */
#define MAP_ANONYMOUS0x20/* don't use a file */
#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
# define MAP_UNINITIALIZED 0x4000000/* For anonymous mmap, memory could be uninitialized */
#else
# define MAP_UNINITIALIZED 0x0/* Don't support this flag */
#endif
MAP_SHARED
用于多个进程共享对一个文件的访问
MAP_PRIVATE
用于创建一个与数据源分离的私有映射,对区域的写入操作不影响数据源文件中的内容
MAP_FIXED
用于在指定的目标线性地址创建一个映射,不允许调整到其他地址
MAP_ANONYMOUS
用于创建与文件无关的映射,或者说没有数据源的映射
do_anonymous_page会调用alloc_zeroed_user_highpage_movable分配一个初始化为全0的内存页。
2
在vm_area_struct数据结构定义中,有一个双链表结点:anon_vma_chainstruct vm_area_struct{
......
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages.A MAP_SHARED vma
* can only be in the i_mmap tree.An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
struct list_head anon_vma_chain;/* Serialized by mmap_sem &
* page_table_lock */
struct anon_vma *anon_vma;/* Serialized by page_table_lock */
......
}
其中,struct anon_vma定义:
/*
* The anon_vma heads a list of private "related" vmas, to scan if
* an anonymous page pointing to this anon_vma needs to be unmapped:
* the vmas on the list will be related by forking, or by splitting.
*
* Since vmas come and go as they are split and merged (particularly
* in mprotect), the mapping field of an anonymous page cannot point
* directly to a vma: instead it points to an anon_vma, on whose list
* the related vmas can be easily linked or unlinked.
*
* After unlinking the last vma on the list, we must garbage collect
* the anon_vma object itself: we're guaranteed no page can be
* pointing to this anon_vma once its vma list is empty.
*/
struct anon_vma{
struct anon_vma *root;/* Root of this anon_vma tree */
struct mutex mutex;/* Serialize access to vma list */
/*
* The refcount is taken on an anon_vma when there is no
* guarantee that the vma of page tables will exist for
* the duration of the operation. A caller that takes
* the reference is responsible for clearing up the
* anon_vma if they are the last user on release
*/
atomic_t refcount;
/*
* NOTE: the LSB of the head.next is set by
* mm_take_all_locks() _after_ taking the above lock. So the
* head must only be read/written after taking the above lock
* to be sure to see a valid next pointer. The LSB bit itself
* is serialized by a system wide lock only visible to
* mm_take_all_locks() (mm_all_locks_mutex).
*/
struct list_head head;/* Chain of private "related" vmas */
};
3
do_mmapstatic inline unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flag, unsigned long offset)
{
unsigned long ret = -EINVAL;
if ((offset + PAGE_ALIGN(len)) < offset)
goto out;
if (!(offset & ~PAGE_MASK))
ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
out:
return ret;
}
if ((offset + PAGE_ALIGN(len)) < offset)
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
/*
* 'kernel.h' contains some often-used function prototypes etc
*/
#define __ALIGN_KERNEL(x, a)__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask)
即
if ((offset + (((len) + (PAGE_SIZE)) & ~(PAGE_SIZE-1))) < offset)
表示如果len太长,再进行align to page boundary操作就会溢出了,那么没有那么多的线性地址空间可以给它映射,因此失败。
if (!(offset & ~PAGE_MASK))
如果offset是位于页的边界处,则继续操作
ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
其中最后一个参数代表了映射区域在文件中的页序号。
/*
* The caller must hold down_write(¤t->mm->mmap_sem).
*/
unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flags, unsigned long pgoff)
{
struct mm_struct * mm = current->mm;
struct inode *inode;
vm_flags_t vm_flags;
int error;
unsigned long reqprot = prot;
/*
* Does the application expect PROT_READ to imply PROT_EXEC?
*
* (the exception is when the underlying filesystem is noexec
*mounted, in which case we dont add PROT_EXEC.)
*/
if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
prot |= PROT_EXEC;
if (!len)
return -EINVAL;
if (!(flags & MAP_FIXED))
addr = round_hint_to_min(addr);
/* Careful about overflows.. */
len = PAGE_ALIGN(len);
if (!len)
return -ENOMEM;
/* offset overflow? */
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
return -EOVERFLOW;
/* Too many mappings? */
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
addr = get_unmapped_area(file, addr, len, pgoff, flags);
if (addr & ~PAGE_MASK)
return addr;
/* Do simple checking here so the lower-level routines won't have
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED)
if (!can_do_mlock())
return -EPERM;
/* mlock MCL_FUTURE? */
if (vm_flags & VM_LOCKED){
unsigned long locked, lock_limit;
locked = len >> PAGE_SHIFT;
locked += mm->locked_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
inode = file ? file->f_path.dentry->d_inode : NULL;
if (file){
switch (flags & MAP_TYPE){
case MAP_SHARED:
if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
return -EACCES;
/*
* Make sure we don't allow writing to an append-only
* file..
*/
if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
return -EACCES;
/*
* Make sure there are no mandatory locks on the file.
*/
if (locks_verify_locked(inode))
return -EAGAIN;
vm_flags |= VM_SHARED | VM_MAYSHARE;
if (!(file->f_mode & FMODE_WRITE))
vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
/* fall through */
case MAP_PRIVATE:
if (!(file->f_mode & FMODE_READ))
return -EACCES;
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC){
if (vm_flags & VM_EXEC)
return -EPERM;
vm_flags &= ~VM_MAYEXEC;
}
if (!file->f_op || !file->f_op->mmap)
return -ENODEV;
break;
default:
return -EINVAL;
}
} else{
switch (flags & MAP_TYPE){
case MAP_SHARED:
/*
* Ignore pgoff.
*/
pgoff = 0;
vm_flags |= VM_SHARED | VM_MAYSHARE;
break;
case MAP_PRIVATE:
/*
* Set pgoff according to addr for anon_vma.
*/
pgoff = addr >> PAGE_SHIFT;
break;
default:
return -EINVAL;
}
}
error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
if (error)
return error;
return mmap_region(file, addr, len, flags, vm_flags, pgoff);
}
EXPORT_SYMBOL(do_mmap_pgoff);
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
addr = get_unmapped_area(file, addr, len, pgoff, flags);
if (addr & ~PAGE_MASK)
return addr;
get_unmapped_area函数用于查找到一个可以安放请求的这么长的一个vma的线性地址范围,返回这个范围的起始地址。如果这个起始地址不是从页对齐处开始的,代表找到的这个地址是不符合要求的,因此也不再往下走了,直接返回。
但是是问题是,如果直接返回了,那么调用都会不会不做检查,直接认为内核已经完成了mmap的操作,而尝试去读写这块还没有与文件建立起关联的内存区域呢,会发生什么不可知的事?
【根据/article/6526483.html中的思想,当进程真正需要访问页时,会触发Page Fault,那么这一步关键是设置好相应的Page Fault handler以及相应struct的指针成员】
相关文章推荐
- linux用户空间内存管理:内存映射和需求分页(缺页中断)
- Linux内核空间到用户空间的共享内存映射
- linux内存布局的内核实现--用户空间的映射方式
- [Linux内存管理] linux内存布局的内核实现--用户空间的映射方式
- linux内存布局的内核实现--用户空间的映射方式
- linux 内存映射 remap_pfn_range操作(内核地址映射到用户空间)
- Linux用户空间与内核空间内存映射
- Linux用户空间与内核空间内存映射
- Linux内存点滴:用户进程内存空间
- linux内存映射/内存区域/地址空间的概念
- linux 用户空间与内核空间——高端内存详解
- linux 用户空间与内核空间——高端内存详解
- [arm驱动]linux设备地址映射到用户空间
- Linux内存点滴 用户进程内存空间
- Linux 用户进程内存空间详解
- 从 Linux 内核访问用户空间内存
- Linux用户空间与内核空间(理解高端内存)【转】
- Linux用户进程内存空间
- linux 用户空间与内核空间 (高端内存详解)
- [arm驱动]linux设备地址映射到用户空间 推荐