您的位置:首页 > 运维架构

open系统调用在内核中的流程浅析

2013-06-16 12:50 246 查看

转载地址:

http://linux.chinaunix.net/techdoc/develop/2008/12/18/1053790.shtml

正文:

---------------------------------------------------------------------------------------------------------------------------------------------------

以字符设备为例,相对于块设备要简单些。

基于2.6.26的内核,Understand the Linux Kernel 3rd里面其实都讲到了,所以想再深入的话,可以去参考那本书。

一)驱动注册open函数都干了些什么?
register_chrdev -> cdev_add  ->  kobj_map 
int register_chrdev(unsigned int major, const char *name,
const struct file_operations *fops)
{
struct char_device_struct *cd;
struct cdev *cdev;
char *s;
int err = -ENOMEM;
cd = __register_chrdev_region(major, 0, 256, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
cdev = cdev_alloc();
if (!cdev)
goto out2;
cdev->owner = fops->owner;
cdev->ops = fops;        // 注意,在后面的 chrdev_open会从cdev再得到 fops
...
}
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
p->dev = dev;
p->count = count;
return kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p);
}
file: fs/char_dev.c
static struct kobject *exact_match(dev_t dev, int *part, void *data)
{
struct cdev *p = data;
return &p->kobj;
}
file: drivers/base/map.c
int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
struct module *module, kobj_probe_t *probe,
int (*lock)(dev_t, void *), void *data)
{
unsigned n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
unsigned index = MAJOR(dev);
unsigned i;
struct probe *p;
if (n > 255)
n = 255;
p = kmalloc(sizeof(struct probe) * n, GFP_KERNEL);
if (p == NULL)
return -ENOMEM;
for (i = 0; i owner = module;
p->get = probe;            // 此处其实就是exact_match
p->lock = lock;
p->dev = dev;
p->range = range;
p->data = data;
}
mutex_lock(domain->lock);
for (i = 0, p -= n; i probes[index % 255];
while (*s && (*s)->range next;
p->next = *s;
*s = p;
}
mutex_unlock(domain->lock);
return 0;
}
【参考 Understanding The Linux Kernel 13.5. Character Device Drivers 】

The device driver model defines a kobject mapping domain for the character devices, which is represented by a descriptor of type kobj_map and is referenced by the cdev_map global variable. The kobj_map descriptor includes a hash table of 255 entries indexed
by the major number of the intervals. The hash table stores objects of type probe, one for each registered range of major and minor numbers, whose fields are listed in Table 13-9.

When the kobj_map( ) function is invoked, the specified interval of device numbers is added to the hash table. The data field of the corresponding probe object points to the cdev descriptor of the device driver. The value of this field is passed to the get
and lock methods when they are executed. In this case, the get method is implemented by a short function that returns the address of the kobject embedded in the cdev descriptor; the lock method, instead, essentially increases the reference counter in the embedded
kobject.

The kobj_lookup( ) function receives as input parameters a kobject mapping domain and a device number; it searches the hash table and returns the address of the kobject of the owner of the interval including the number, if it was found. When applied to the
mapping domain of the character devices, the function returns the address of the kobject embedded in the cdev descriptor of the device driver that owns the interval of device numbers.

二)从系统调用往内核走,看当初驱动里注册的file_operations里的open函数怎么被调用的

sys_open -> do_sys_open -> do_filp_open -> nameidata_to_filp -> __dentry_open

问题是 :

1)__dentry_open如何找到 chrdev_open?

2)最终又是如何调用file_operations里的在驱动里面注册的open函数的呢?
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int flags, struct file *f,
int (*open)(struct inode *, struct file *))
{
struct inode *inode;
int error;
f->f_flags = flags;
f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = __get_file_write_access(inode, mnt);
if (error)
goto cleanup_file;
if (!special_file(inode->i_mode))
file_take_write(f);
}
f->f_mapping = inode->i_mapping;
f->f_path.dentry = dentry;
f->f_path.mnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop);    // 此处获得 def_chr_fops
file_move(f, &inode->i_sb->s_files);
error = security_dentry_open(f);
if (error)
goto cleanup_all;
if (!open && f->f_op)
open = f->f_op->open;        // 此处调用 def_chr_fops里的open函数,即chrdev_open
...
}
file: fs/char_dev.c
chrdev_open() {
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) {
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);  // 找到cdev对应的kobj对象, 跟kobj_map遥相对应的,反操作
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);    // 找到cdev
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
inode->i_cindex = idx;
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
} else if (!cdev_get(p))
ret = -ENXIO;
spin_unlock(&cdev_lock);
cdev_put(new);
if (ret)
return ret;
filp->f_op = fops_get(p->ops);            // 这里又找回了当初驱动注册时的 file_operations指针
if (!filp->f_op) {
cdev_put(p);
return -ENXIO;
}
if (filp->f_op->open) {
lock_kernel();
ret = filp->f_op->open(inode,filp);    // 此处算真正的调用了file_operations里的open函数
unlock_kernel();
}
...
}
file: drivers/base/map.c
struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)
{
struct kobject *kobj;
struct probe *p;
unsigned long best = ~0UL;
retry:
mutex_lock(domain->lock);
for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {
struct kobject *(*probe)(dev_t, int *, void *);
struct module *owner;
void *data;
if (p->dev > dev || p->dev + p->range - 1 range - 1 >= best)
break;
if (!try_module_get(p->owner))
continue;
owner = p->owner;
data = p->data;
probe = p->get;                // 这里其实就是 exact_match函数了
best = p->range - 1;
*index = dev - p->dev;
if (p->lock && p->lock(dev, data) lock);
kobj = probe(dev, index, data);        // 这里调用了 exact_match 函数
/* Currently ->owner protects _only_ ->probe() itself. */
module_put(owner);
if (kobj)
return kobj;
goto retry;
}
mutex_unlock(domain->lock);
return NULL;
}
【参考 Understanding The Linux Kernel 13.5.2. Accessing a Character Device Driver】
We mentioned in the earlier section "VFS Handling of Device Files" that the dentry_open( ) function triggered by the open( ) system call service routine customizes the f_op field in the file object of the character device file so that it points to the def_chr_fops
table. This table is almost empty; it only defines the chrdev_open( ) function as the open method of the device file. This method is immediately invoked by dentry_open( ).

三)什么时候为字符设备设置的def_chr_fops ?

这个跟具体的文件系统有关系的。

现在/dev/下的设备节点都是通过udev动态创建的,udev会去调用mknod(假定是ext2,内核会调用ext2_mknod),如果是char设备,会把def_chr_fops附给inode->i_fop,而ext2_mknod会调用init_special_inode(),函数的部分实现如下:
file: fs/ext2/namei.c
static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
{
struct inode * inode;
int err;
if (!new_valid_dev(rdev))
return -EINVAL;
inode = ext2_new_inode (dir, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);        // 调用 init_special_inode
file: fs/inode.c
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_mode = mode;
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;            // 这里为char设备设置的缺省操作
inode->i_rdev = rdev;
}
...
}
file: fs/char_dev.c
const struct file_operations def_chr_fops = {
.open = chrdev_open,
};
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  内核 Linux