您的位置:首页 > 运维架构 > Linux

Linux内核源码分析-安装普通文件系统-mount系统调用

2016-10-15 11:27 501 查看
Linux内核源码分析-安装普通文件系统-mount系统调用
本文主要参考《深入理解Linux内核》,结合2.6.11.1版的内核代码,分析内核文件子系统中的安装普通文件系统函数。

注意:

1、不描述内核同步、错误处理、参数合法性验证相关的内容

2、源码摘自Linux内核2.6.11.1版

3、阅读本文请结合《深入理解Linux内核》第三版相关章节

4、本文会不定时更新

1、sys_mount

函数源码:

asmlinkage long sys_mount(char __user *dev_name, char __user * dir_name,

             char __user * type, unsigned long flags,

             void __user * data)

{

    intretval;

    unsignedlong data_page;

    unsignedlong type_page;

    unsignedlong dev_page;

    char*dir_page;

   //从用户空间复制文件系统类型字符串到type_page指向的内存地址

    retval= copy_mount_options (type, &type_page);

    if(retval < 0)

       returnretval;

   //从用户空间获取路径名

    dir_page= getname(dir_name);

    retval= PTR_ERR(dir_page);

    if(IS_ERR(dir_page))

       gotoout1;

   //从用户空间复制块设备文件名到内核空间

    retval= copy_mount_options (dev_name, &dev_page);

    if(retval < 0)

       gotoout2;

  //复制与文件系统相关的数据结构的地址

    retval= copy_mount_options (data, &data_page);

    if(retval < 0)

       gotoout3;

 

    lock_kernel();

   //分析见下文

    retval= do_mount((char*)dev_page, dir_page, (char*)type_page,

             flags, (void*)data_page);

    unlock_kernel();

    free_page(data_page);

 

out3:

    free_page(dev_page);

out2:

    putname(dir_page);

out1:

    free_page(type_page);

    returnretval;

}

函数处理流程:

从用户空间复制数据到内核空间(准备参数)、获取大内核锁、调用do_mount函数

2、copy_mount_options

函数源码:

int copy_mount_options(const void__user *data, unsigned long *where)

{

    inti;

    unsignedlong page;

    unsignedlong size;

   

    *where= 0;

    if(!data)

       return0;

 

    if(!(page = __get_free_page(GFP_KERNEL)))

       return-ENOMEM;

 

    /*We only care that *some* data at the address the user

     * gave us is valid.  Just in case, we'll zero

     * the remainder of the page.

     */

    /*copy_from_user cannot cross TASK_SIZE ! */

    size= TASK_SIZE - (unsigned long)data;

    if(size > PAGE_SIZE)

       size= PAGE_SIZE;

 

    i= size - exact_copy_from_user((void *)page, data, size);

    if(!i) {

       free_page(page);

       return-EFAULT;

    }

    if(i != PAGE_SIZE)

       memset((char*)page + i, 0, PAGE_SIZE - i);

    *where= page;

    return0;

}

函数处理流程:

1、调用函数__get_free_page分配一个空闲页框并返回页框的线性地址

2、调用函数exact_copy_from_user把用户空间的数据从data复制到该页框中

3、把页剩余的空间置0,并用*where返回页框的起始线性地址

3、exact_copy_from_user

函数源码:

/*

 * Some copy_from_user() implementations do notreturn the exact number of

 * bytes remaining to copy on a fault.  But copy_mount_options() requires that.

 * Note that this function differs fromcopy_from_user() in that it will oops

 * on bad values of `to', rather than returninga short copy.

 */

static long

exact_copy_from_user(void *to, constvoid __user *from, unsigned long n)

{

    char*t = to;

    constchar __user *f = from;

    charc;

 

    if(!access_ok(VERIFY_READ, from, n))

       returnn;

 

    while(n) {

       if(__get_user(c, f)) {

           memset(t,0, n);

           break;

       }

       *t++= c;

       f++;

       n--;

    }

    returnn;

}

函数处理流程:

调用函数__get_user一次从用户空间复制一个字符,返回未完成复制的字节数,分析参见后续文章。

4、do_mount

函数源码:

/*

 * Flags is a 32-bit value that allows up to 31non-fs dependent flags to

 * be given to the mount() call (ie: read-only,no-dev, no-suid etc).

 *

 * data is a (void *) that can point to anystructure up to

 * PAGE_SIZE-1 bytes, which can containarbitrary fs-dependent

 * information (or be NULL).

 *

 * Pre-0.97 versions of mount() didn't have aflags word.

 * When the flags word was introduced its tophalf was required

 * to have the magic value 0xC0ED, and thisremained so until 2.4.0-test9.

 * Therefore, if this magic number is present,it carries no information

 * and must be discarded.

 */

long do_mount(char * dev_name, char *dir_name, char *type_page,

         unsigned long flags, void *data_page)

{

    structnameidata nd;

    intretval = 0;

    intmnt_flags = 0;

 

    /*Discard magic */

    if((flags & MS_MGC_MSK) == MS_MGC_VAL)

       flags&= ~MS_MGC_MSK;

 

    /*Basic sanity checks */

    if(!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))

       return-EINVAL;

    if(dev_name && !memchr(dev_name, 0, PAGE_SIZE))

       return-EINVAL;

 

    if(data_page)

       ((char*)data_page)[PAGE_SIZE - 1] = 0;

 

    /*Separate the per-mountpoint flags */

    if(flags & MS_NOSUID)

       mnt_flags|= MNT_NOSUID;

    if(flags & MS_NODEV)

       mnt_flags|= MNT_NODEV;

    if(flags & MS_NOEXEC)

       mnt_flags|= MNT_NOEXEC;

    flags&= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);

 

    /*... and get the mountpoint */

    retval= path_lookup(dir_name, LOOKUP_FOLLOW, &nd);

    if(retval)

       returnretval;

 

    retval= security_sb_mount(dev_name, &nd, type_page, flags, data_page);

    if(retval)

       gotodput_out;

 

    if(flags & MS_REMOUNT)

       retval= do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,

                  data_page);

    elseif (flags & MS_BIND)

       retval= do_loopback(&nd, dev_name, flags & MS_REC);

    elseif (flags & MS_MOVE)

       retval= do_move_mount(&nd, dev_name);

    else

       retval= do_new_mount(&nd, type_page, flags, mnt_flags,

                    dev_name, data_page);

dput_out:

    path_release(&nd);

    returnretval;

}

函数处理流程:

1、把mount安装标识中和安装文件系统描述相关的标志进行转化

2、调用path_lookup函数对安装点路径名进行查找,查找结果存放在nameidata类型的nd局部变量中

3、根据安装标识是重新安装、绑定安装、移动安装、新安装分别调用不同的函数进行处理,本文仅描述新安装,即调用do_new_mount

5、do_new_mount

函数源码:

/*

 * create a new mount for userspace and requestit to be added into the

 * namespace's tree

 */

static int do_new_mount(structnameidata *nd, char *type, int flags,

           intmnt_flags, char *name, void *data)

{

    structvfsmount *mnt;

 

    if(!type || !memchr(type, 0, PAGE_SIZE))

       return-EINVAL;

 

    /*we need capabilities... */

    if(!capable(CAP_SYS_ADMIN))

       return-EPERM;

 

    mnt= do_kern_mount(type, flags, name, data);

    if(IS_ERR(mnt))

       returnPTR_ERR(mnt);

 

    returndo_add_mount(mnt, nd, mnt_flags, NULL);

}

函数处理流程:

1、    调用函数do_kern_mount进行实际安装操作,返回新安装文件系统描述符的地址

2、    调用函数do_add_mount把新安装文件系统描述符插入到相关数据结构中

6、do_kern_mount

函数源码:

struct vfsmount *

do_kern_mount(const char *fstype, intflags, const char *name, void *data)

{

    structfile_system_type *type = get_fs_type(fstype);

    structsuper_block *sb = ERR_PTR(-ENOMEM);

    structvfsmount *mnt;

    interror;

    char*secdata = NULL;

 

    if(!type)

       returnERR_PTR(-ENODEV);

 

    mnt= alloc_vfsmnt(name);

    if(!mnt)

       gotoout;

 

    if(data) {

       secdata= alloc_secdata();

       if(!secdata) {

           sb= ERR_PTR(-ENOMEM);

           gotoout_mnt;

       }

 

       error= security_sb_copy_data(type, data, secdata);

       if(error) {

           sb= ERR_PTR(error);

           gotoout_free_secdata;

       }

    }

 

    sb= type->get_sb(type, flags, name, data);

    if(IS_ERR(sb))

       gotoout_free_secdata;

    error = security_sb_kern_mount(sb, secdata);

    if (error)

       goto out_sb;

    mnt->mnt_sb= sb; //超级块

    mnt->mnt_root= dget(sb->s_root); //根文件系统

    mnt->mnt_mountpoint= sb->s_root; //挂载点

    mnt->mnt_parent= mnt; //父文件系统

    mnt->mnt_namespace= current->namespace; //命名空间

    up_write(&sb->s_umount);

    put_filesystem(type);

    returnmnt;

out_sb:

    up_write(&sb->s_umount);

    deactivate_super(sb);

    sb= ERR_PTR(error);

out_free_secdata:

    free_secdata(secdata);

out_mnt:

    free_vfsmnt(mnt);

out:

    put_filesystem(type);

    return(struct vfsmount *)sb;

}

函数处理流程:

1、根据文件系统类型名称,调用函数get_fs_type获得类型为file_system_type的文件系统类型对象的地址,存入局部变量type中

2、调用alloc_vfsmnt从mnt_cache slab高速缓存中分配一个新的超级块对象

3、调用依赖于文件系统的type->get_sb函数分配并初始化一个超级块,具体分析参加后续文章“Linux内核源码分析-ext2分配初始化超级块-ext2_get_sb”

4、初始化mnt相关字段,具体参见注释

7、do_add_mount

函数源码:

/*

 * add a mount into a namespace's mount tree

 * - provide the option of adding the new mountto an expiration list

 */

int do_add_mount(struct vfsmount*newmnt, struct nameidata *nd,

        int mnt_flags, struct list_head *fslist)

{

    interr;

 

    down_write(¤t->namespace->sem);

    /*Something was mounted here while we slept */

    while(d_mountpoint(nd->dentry)&& follow_down(&nd->mnt, &nd->dentry))

       ;

    err= -EINVAL;

    if(!check_mnt(nd->mnt))

       gotounlock;

 

    /*Refuse the same filesystem on the same mount point */

    err= -EBUSY;

    if(nd->mnt->mnt_sb == newmnt->mnt_sb &&

        nd->mnt->mnt_root == nd->dentry)

       gotounlock;

 

    err= -EINVAL;

    if(S_ISLNK(newmnt->mnt_root->d_inode->i_mode))

       gotounlock;

 

    newmnt->mnt_flags= mnt_flags;

    err= graft_tree(newmnt, nd);

 

    if(err == 0 && fslist) {

       /*add to the specified expiration list */

       spin_lock(&vfsmount_lock);

       list_add_tail(&newmnt->mnt_fslink,fslist);

       spin_unlock(&vfsmount_lock);

    }

 

unlock:

    up_write(¤t->namespace->sem);

    mntput(newmnt);

    returnerr;

}

函数处理流程:

1、当安装点路径目录项的的安装文件系统数不为0时,调用follow_down更新安装的目录项对象和安装点对应的文件系统对象,具体分析见后续文章(路径名查找)

2、验证安装点的命名空间是否还是当前命名空间,如果不是,返回错误

3、如果文件系统已被安装或安装点是一个符号链接,返回错误

4、初始化do_kern_mount分配的vfsmount 对象newmnt的mnt_flags

5、调用函数graft_tree把新分配的文件对象插入到namespace链表、散列表、父文件系统的子链表中

8、alloc_vfsmnt

函数功能:

从mnt_cache slab高速缓存分配一个vfsmount对象,并初始化相关字段,具体信息见注释

函数源码:

struct vfsmount *alloc_vfsmnt(constchar *name)

{

    structvfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);

    if(mnt) {

       memset(mnt,0, sizeof(struct vfsmount));

       atomic_set(&mnt->mnt_count,1);//引用计数器

       INIT_LIST_HEAD(&mnt->mnt_hash);//mount_hashtable哈希表链接指针

       INIT_LIST_HEAD(&mnt->mnt_child);//子文件系统链接指针

       INIT_LIST_HEAD(&mnt->mnt_mounts);//子文件系统链表头

       INIT_LIST_HEAD(&mnt->mnt_list);  //namespace链接指针

       INIT_LIST_HEAD(&mnt->mnt_fslink);//到期文件系统链接指针

       if(name) {

           intsize = strlen(name)+1;

           char*newname = kmalloc(size, GFP_KERNEL);

           if(newname) {

              memcpy(newname,name, size);

              mnt->mnt_devname= newname; //文件系统设备文件名

           }

       }

    }

    returnmnt;

}

9、graft_tree

函数源码(分析参见注释):

static int graft_tree(struct vfsmount*mnt, struct nameidata *nd)

{

    interr;

    if(mnt->mnt_sb->s_flags & MS_NOUSER)

       return-EINVAL;

 

    if(S_ISDIR(nd->dentry->d_inode->i_mode) !=

         S_ISDIR(mnt->mnt_root->d_inode->i_mode))

       return-ENOTDIR;

 

    err= -ENOENT;

    down(&nd->dentry->d_inode->i_sem);

    if(IS_DEADDIR(nd->dentry->d_inode))

       gotoout_unlock;

 

    err= security_sb_check_sb(mnt, nd);

    if(err)

       gotoout_unlock;

 

    err= -ENOENT;

    spin_lock(&vfsmount_lock);

    if(IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {

       structlist_head head;

       attach_mnt(mnt,nd); //见下面代码注释

       list_add_tail(&head,&mnt->mnt_list);

       list_splice(&head,current->namespace->list.prev);

       mntget(mnt);//把文件对象插入命名空间链表并增加引用计数器

       err= 0;

    }

    spin_unlock(&vfsmount_lock);

out_unlock:

    up(&nd->dentry->d_inode->i_sem);

    if(!err)

       security_sb_post_addmount(mnt,nd);

    returnerr;

}

static void attach_mnt(struct vfsmount*mnt, struct nameidata *nd)

{

    mnt->mnt_parent= mntget(nd->mnt); //父文件系统

    mnt->mnt_mountpoint= dget(nd->dentry); //安装点目录项

    list_add(&mnt->mnt_hash,mount_hashtable+hash(nd->mnt, nd->dentry)); //哈希表mount_hashtable

    list_add_tail(&mnt->mnt_child,&nd->mnt->mnt_mounts);//父文件系统的子文件系统链表

    nd->dentry->d_mounted++;//安装点安装文件系统数

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息