Linux内核---40.模块加载过程分析
2016-07-09 10:20
513 查看
一. 自己写的一个insmod小程序
1. 下面是一个类似于insmod的程序
绝大多数代码是从busybox中的insmod.c中弄过来的,不过效果还是有的.
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>
#include <sys/syscall.h>
#define INT_MAX ((int)(~0U>>1))
void* try_to_mmap_module(const char *filename, size_t *image_size_p)
{
void *image;
struct stat st;
int fd;
fd = open(filename, O_RDONLY);
fstat(fd, &st);
image = NULL;
/* st.st_size is off_t, we
can't just pass it to mmap */
if (st.st_size <= *image_size_p) {
size_t image_size = st.st_size;
image = mmap(NULL, image_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (image == MAP_FAILED)
image = NULL;
else
/* Success. Report the size */
*image_size_p = image_size;
}
close(fd);
return image;
}
int main(int argc, char **argv)
{
char *filename;
int rc;
size_t image_size;
char *image;
filename = *++argv;
image_size = INT_MAX - 4095;
image = try_to_mmap_module(filename, &image_size);
if (image == NULL) {
printf("mmap error\n");
return -1;
}
//loads the relocated module image into kernel space and runs
the module's init function.
printf("next init_module\n");
rc = init_module(image, image_size, ""); //它只是调用了
init_module,为了简单不可带参数
if(rc)
{
printf("init_module failed %d:%s\n", errno, strerror(errno));
munmap(image, image_size);
return rc;
}
printf("init module sucess\n");
return rc;
}
Makefile
CC=arm-none-linux-gnueabi-gcc
inm: inm.c
$(CC) -g -o
$@ $<
2. 实验结果如下:
root@OK6410:/work/hello# ../inm ./hello.ko
init module sucess
root@OK6410:/work/hello# lsmod
hello 2045 0 - Live 0xbf004000
二. linux内核的模块装载过程
为何只调用一个init_module就会把模块装载到内核呢?下面就分析一下它的过程
在kernel/module.c中
SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned
long, len, const char
__user *, uargs)
{
struct module *mod;
int ret = 0;
if (!capable(CAP_SYS_MODULE) || modules_disabled) //判断是否有权限
return -EPERM;
mod = load_module(umod, len, uargs); //主要过程都在这儿
blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); //通知有新的模块来了
set_section_ro_nx(mod->module_core, mod->core_text_size, mod->core_ro_size, mod->core_size);
set_section_ro_nx(mod->module_init, mod->init_text_size, mod->init_ro_size, mod->init_size);
do_mod_ctors(mod);
if (mod->init != NULL)
ret = do_one_initcall(mod->init); //执行模块的init函数
if (ret < 0) {
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod);
free_module(mod);
wake_up(&module_wq);
return ret;
}
if (ret > 0)
dump_stack();
mod->state = MODULE_STATE_LIVE;
wake_up(&module_wq);
blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod);
async_synchronize_full();
mutex_lock(&module_mutex);
module_put(mod);
trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
mod->num_symtab = mod->core_num_syms;
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
unset_module_init_ro_nx(mod);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
mod->init_ro_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);
return 0;
}
注: SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned long, len, const char __user
*, uargs)
其中SYSCALL_DEFINE3是定义在include/linux/syscalls.h中
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINEx(x, sname, ...) __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
#define __SYSCALL_DEFINEx(x, name, ...) asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))
asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))
还有一个宏 __SC_DECL*也是定义在include/linux/syscalls.h中,
它的作用是: 去掉两个参数之间的逗号,这TMD还是递归的!!
#define __SC_DECL1(t1, a1) t1 a1
#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)
所以是终展开如下:
asmlinkage long sys_init_module(void __user* umod, unsinged long len, const char __user* uargs)
为什么是SYSCALL_DEFINE3? 后面的3是代表有3个参数.
2. 在load_module中完成大部分操作
init_module
--> load_module
static struct module *load_module(void __user *umod, unsigned
long len, const char __user *uargs)
{
struct load_info info = { NULL, };
copy_and_check(&info, umod, len, uargs); //1.申请一个hello.ko大小的内存,并检查hello.ko是否合法
struct module* mod = layout_and_allocate(&info); //2.
解析模块hello.ko,并把含SHF_ALLOC的数据加载到新内存中
module_unload_init(mod); //3.
find_module_sections(mod, &info); //4.
check_module_license_and_versions(mod); //5.
setup_modinfo(mod, &info); //6.
simplify_symbols(mod, &info); //7.
apply_relocations(mod, &info); //8.
post_relocation(mod, &info); //9.
flush_module_icache(mod); //10.
mod->args = strndup_user(uargs, ~0UL >> 1);
mod->state = MODULE_STATE_COMING;
mutex_lock(&module_mutex);
if (find_module(mod->name)) {
err = -EEXIST;
goto unlock;
}
if (!mod->taints || mod->taints == (1U<<TAINT_CRAP))
dynamic_debug_setup(info.debug, info.num_debug);
err = verify_export_symbols(mod);
module_bug_finalize(info.hdr, info.sechdrs, mod);
list_add_rcu(&mod->list, &modules);
mutex_unlock(&module_mutex);
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
kfree(info.strmap);
free_copy(&info);
trace_module_load(mod);
return mod;
}
下面以hello.ko的加载为例分析一下:
0000000: 7f45 4c46 0101 0100 0000 0000 0000 0000 //e_ident
0000010: 0100 2800 0100 0000 0000 0000 0000 0000
type mach version entry phoff
0000020: 2cb7 0000 0000 0005 3400 0000 0000 2800
shoff flags ehsize phentsize phnum shensize
0000030: 2400 2100
shnum shstrndx
e_shoff = 0xb72c = 46892 --> section header table在文件中的偏移是46892
e_shentsize = 0x28 = 40 --> section header table 每个entry是40Byte
e_shnum = 0x24 =36 --> 有36个section header table
e_shstrndx = 0x21 = 33 --> 说明.shstrtab这个section在section_header_table中的第33项
2.1 第1步申请内存,并把ko数据从用户区copy到内核区
root@OK6410:/work/hello# ls -l hello.ko
-rw-rw-r-- 1 1000 1000 62173 Aug 6 13:58 hello.ko
init_module
--> load_module
--> copy_and_check
static int copy_and_check(struct load_info *info, const void
__user *umod, unsigned long len, const char
__user *uargs)
{
int err;
Elf_Ehdr *hdr;
if (len < sizeof(*hdr))
return -ENOEXEC;
//根据hello.ko的大小来申请内存
if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) //这儿的len就是hello.ko的size=62173
return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) { //将hello.ko整个文件由用户空间copy到内核空间
err = -EFAULT;
goto free_hdr;
}
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 //检查ident字段的四个字节是不是'\177ELF'
|| hdr->e_type != ET_REL //e_type
必须是1, 说明这是relocatable file
|| !elf_check_arch(hdr) //检查e_machine=0x28代表arm
|| hdr->e_shentsize != sizeof(Elf_Shdr)) { //elf的section_header
是不是等于sizeof(Elf_Shdr)
err = -ENOEXEC;
goto free_hdr;
}
if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) { //检查长度:
e_shoff是section_header_table的起始
err = -ENOEXEC; //hdr->e_shnum * sizeof(Elf_Shdr) 是section_header_table的大小
goto free_hdr;
}
info->hdr = hdr; //保存在info中
info->len = len;
return 0;
}
2.2 第2步申请内存,并把ko数据从用户区copy到内核区
init_module
--> load_module
--> layout_and_allocate
static struct module *layout_and_allocate(struct load_info *info)
{
/* Module within temporary copy. */
struct module *mod;
Elf_Shdr *pcpusec;
mod = setup_load_info(info); //1.更新每一个section的实际地址,并返回gnu.linkonce.this_module这个section的地址
check_modinfo(mod, info); //2.检查mofinfo这个section中的内容
//这个函数是空的
module_frob_arch_sections(info->hdr, info->sechdrs, info->secstrings, mod);
//没有pcpu这个section, pcpupcpusec->sh_size=0
pcpusec = &info->sechdrs[info->index.pcpu];
if (pcpusec->sh_size) {
/* We have a special allocation for this section. */
err = percpu_modalloc(mod, pcpusec->sh_size, pcpusec->sh_addralign);
if (err)
goto out;
pcpusec->sh_flags &= ~(unsigned
long)SHF_ALLOC;
}
//3.这儿的len就是hello.ko的size=62173
layout_sections(mod, info);
//为符号名称字符串表分配内存
info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size)* sizeof(long), GFP_KERNEL);
//4. 如果配置了选项CONFIG_KALLSYMS,就要把符号名称的字符串表加载到内存
layout_symtab(mod, info);
//5. 把hello.ko中需要加载到内存的加载到内存
err = move_module(mod, info);
//.gnu.linkonce.this_module的section的地址也要相应改变
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
kmemleak_load_module(mod, info);
return mod;
}
2.2.1 HDR的第一次修改
init_module
--> load_module
--> layout_and_allocate
--> setup_load_info
更新每一个section的实际地址,并返回gnu.linkonce.this_module这个section的地址
static struct module *setup_load_info(struct load_info *info)
{
unsigned int i;
int err;
struct module *mod;
//获取section header table基地址
info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
//获取.shstrtab这个section的地址
info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset;
err = rewrite_section_headers(info); //更新sh_addr的地址
//遍历查找符号名称字符串表
for (i = 1; i < info->hdr->e_shnum; i++) {
if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
info->index.sym = i;
info->index.str = info->sechdrs[i].sh_link;
info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset;
break;
}
}
//遍历,并找出gnu.linkonce.this_module这个section在section_table中的地址.
//如果找不到,则说明这不是一个模块,返加error
info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
//找到gnu.linkonce.this_module这个section的地址
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
//如果strip了hello.ko,那么在加载时找不到符号,也会报错
if (info->index.sym == 0)
return ERR_PTR(-ENOEXEC);
info->index.pcpu = find_pcpusec(info);
if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
return ERR_PTR(-ENOEXEC);
return mod; //返回gnu.linkonce.this_module这个section的地址
}
init_module
--> load_module
--> layout_and_allocate
--> setup_load_info
--> rewrite_section_headers
因为hello.ko己经被读取到了内核空间的某一个内存处,要想访问每个section的地址需要通过sh_addr
但是现在sh_addr这个值己经不准了,所以需要用新地址来更新一下
static int rewrite_section_headers(struct load_info *info)
{
unsigned int i;
info->sechdrs[0].sh_addr = 0;
for (i = 1; i < info->hdr->e_shnum; i++) {
Elf_Shdr *shdr = &info->sechdrs[i];
if (shdr->sh_type != SHT_NOBITS && info->len < shdr->sh_offset + shdr->sh_size)
return -ENOEXEC;
//现在己经把hello.ko读到了内核空间的某一个地址处,sh_addr是指向section的地址需要更新
//在section_header_table中修改每一个secion的地址
shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
#ifndef CONFIG_MODULE_UNLOAD //如果配了这个宏说明,模块加载后不可卸载
//模块都不可卸载了, exit这个section也没有加载的必要了
if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
shdr->sh_flags &= ~(unsigned
long)SHF_ALLOC;
#endif
}
info->index.vers = find_sec(info, "__versions"); //查找version这个section的地址
info->index.info = find_sec(info, ".modinfo"); //查找modinfo这个section的地址
info->sechdrs[info->index.info].sh_flags &= ~(unsigned
long)SHF_ALLOC; //把version 与 modinfo这两个section
info->sechdrs[info->index.vers].sh_flags &= ~(unsigned
long)SHF_ALLOC; //标记为最终不可见
return 0;
}
init_module
--> load_module
--> layout_and_allocate
--> setup_load_info
--> find_sec
//遍历整个section_header_table找到name与参数相同的section,返回这个section在section_header_table中的索引
static unsigned int find_sec(const struct
load_info *info, const char *name)
{
//info->secstrings是.shstrtab的地址,
shdr->sh_name是每一个section的名字在.shstrtab中的偏移
for (i = 1; i < info->hdr->e_shnum; i++) {
Elf_Shdr *shdr = &info->sechdrs[i];
if ((shdr->sh_flags & SHF_ALLOC) && strcmp(info->secstrings + shdr->sh_name, name) == 0)
return i;
}
return 0;
}
2.2.2 检查section中的版本号
init_module
--> load_module
--> layout_and_allocate
--> check_modinfo
检查mofinfo这个section中的内容
static int check_modinfo(struct module *mod, struct
load_info *info)
{
const char *modmagic = get_modinfo(info, "vermagic");
int err;
//检查编译模块的系统版本与当前运行的系统版本是否一致,hello.ko中vermagic=3.0.1
if (!modmagic) {
err = try_to_force_load(mod, "bad
vermagic");
if (err)
return err;
} else if (!same_magic(modmagic, vermagic, info->index.vers)) {
printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
mod->name, modmagic, vermagic);
return -ENOEXEC;
}
if (get_modinfo(info, "staging")) {
add_taint_module(mod, TAINT_CRAP);
printk(KERN_WARNING "%s: module is from the staging directory,"
" the quality is unknown, you have been warned.\n",
mod->name);
}
//检查license是不是GPL
set_license(mod, get_modinfo(info, "license"));
return 0;
}
init_module
--> load_module
--> layout_and_allocate
--> check_modinfo
--> get_modinfo
static char *get_modinfo(struct load_info *info, const char *tag)
{
unsigned int taglen = strlen(tag);
//在函数rewrite_section_headers中有:
info->index.info = find_sec(info, ".modinfo");
//这儿是获取modinfo这个section在section_table中的地址
Elf_Shdr *infosec = &info->sechdrs[info->index.info];
unsigned long size = infosec->sh_size; //modinfo这个section的长度
//在modinfo中每一项都以\0结束
for (p = (char *)infosec->sh_addr; p; p = next_string(p, &size)) {
if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')
return p + taglen + 1; //返回等号后面的第一个字节的地址,既结果
}
return NULL;
}
下面是用objdump打印的modinfo
从上图可以看出每一个字段的结束都是 00
2.2.3 划分为两部分CORE 与 INIT
init_module
--> load_module
--> layout_and_allocate
--> layout_sections
static void layout_sections(struct module *mod, struct
load_info *info)
{
static unsigned long const masks[][2] = {
{ SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
{ SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
{ SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
{ ARCH_SHF_SMALL | SHF_ALLOC, 0 }
};
unsigned int m, i;
for (i = 0; i < info->hdr->e_shnum; i++)
info->sechdrs[i].sh_entsize = ~0UL;
//划分为两部分: CORE INIT
//a. 第1部分CORE: 查找标志中含有SHF_ALLOC的section
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
const char *sname = info->secstrings + s->sh_name;
//含有SHF_ALLOC的section需要加载到最终的内存
//含有SHF_ALLOC的section并且不以init开头的划分到CORE部分
if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL || strstarts(sname, ".init"))
continue;
s->sh_entsize = get_offset(mod, &mod->core_size, s, i); //sh_entsize是core+init的size
}
switch (m) {
case 0: //可执行的段,代码段都一样
mod->core_size = debug_align(mod->core_size);
mod->core_text_size = mod->core_size;
break;
case 1: //只读段
mod->core_size = debug_align(mod->core_size);
mod->core_ro_size = mod->core_size;
break;
case 3: //所有段
mod->core_size = debug_align(mod->core_size);
break;
}
}
//b. 第2部分INIT
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
const char *sname = info->secstrings + s->sh_name;
//含有SHF_ALLOC的section需要加载到最终的内存
//含有SHF_ALLOC的section并且以init开头的划分到INIT部分
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
|| !strstarts(sname, ".init"))
continue;
s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | INIT_OFFSET_MASK);
}
switch (m) {
case 0:
mod->init_size = debug_align(mod->init_size);
mod->init_text_size = mod->init_size;
break;
case 1:
mod->init_size = debug_align(mod->init_size);
mod->init_ro_size = mod->init_size;
break;
case 3:
mod->init_size = debug_align(mod->init_size);
break;
}
}
}
注: 这儿为什么要区分init与core呢? 因为init部分的内存在使用完之后,马上就会被释放,而core部分的内存则会一直存在于内存中
2.2.4 加载符号名称字符串表
当配置了内核选项CONFIG_KALLSYMS时,就需要把符号名称字符串表加载到内存中去
因为这个section的标志位中不含 SHF_ALLOC,所以需要单独加载
static void layout_symtab(struct module *mod, struct
load_info *info)
{
Elf_Shdr *symsect = info->sechdrs + info->index.sym;
Elf_Shdr *strsect = info->sechdrs + info->index.str;
const Elf_Sym *src;
unsigned int i, nsrc, ndst;
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
info->index.sym) | INIT_OFFSET_MASK;
DEBUGP("\t%s\n", info->secstrings + symsect->sh_name);
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
for (ndst = i = 1; i < nsrc; ++i, ++src)
if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
unsigned int j = src->st_name;
while (!__test_and_set_bit(j, info->strmap)
&& info->strtab[j])
++j;
++ndst;
}
/* Append room for core symbols at end of
core part. */
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
/* Put string table section at end of
init part of module. */
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
info->index.str) | INIT_OFFSET_MASK;
DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);
/* Append room for core symbols' strings
at end of core part. */
info->stroffs = mod->core_size;
__set_bit(0, info->strmap);
mod->core_size += bitmap_weight(info->strmap, strsect->sh_size);
}
2.2.5 HDR视图的第二次转移
为core与ini区分配新的内存,并把core与init部分copy到新内存中
static int move_module(struct module *mod, struct
load_info *info)
{
int i;
void *ptr;
//对core区分配内存
ptr = module_alloc_update_bounds(mod->core_size);
kmemleak_not_leak(ptr); //检查内存泄漏
memset(ptr, 0, mod->core_size); //将对core区内存清0
mod->module_core = ptr; //将core内存指针记录在变量module_core中
//对init区分配内存
ptr = module_alloc_update_bounds(mod->init_size);
kmemleak_ignore(ptr); //检查内存泄漏
memset(ptr, 0, mod->init_size); //将对init区内存清0
mod->module_init = ptr;
//将init内存指针记录在变量module_init中
//将core与init区copy到新分配的内存中去
for (i = 0; i < info->hdr->e_shnum; i++) {
void *dest;
Elf_Shdr *shdr = &info->sechdrs[i];
if (!(shdr->sh_flags & SHF_ALLOC))
continue;
if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->module_init + (shdr->sh_entsize & ~INIT_OFFSET_MASK);
else
dest = mod->module_core + shdr->sh_entsize;
if (shdr->sh_type != SHT_NOBITS)
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
//sectin再次移动,那么section_header_table中指向section的指针也要相应改变
shdr->sh_addr = (unsigned
long)dest;
}
return 0;
}
附:
1. 关于c中的转义
类似于 \t \b
#include <stdio.h>
#include <stdlib.h>
int main ( int argc, char *argv[] )
{
printf("0x%x\n", '\177');
return EXIT_SUCCESS;
}
gcc -o test test.c
结果是: '\177'
= 0x7f
\OOO : 8进制
\xXX : 16进制
2. 关于gcc中的条件表达式
表达式a?b:c, 省略了中间的b,是什么意思呢?
#include <stdio.h>
#include <stdlib.h>
int main ( int argc, char *argv[] )
{
int a=2;
printf("result=%d\n", a?:1);
return 0;
}
这是gcc的一个扩展: a?:c == a?a:c
参考文章:
http://gcc.gnu.org/onlinedocs/gcc/Conditionals.html#Conditionals
1. 下面是一个类似于insmod的程序
绝大多数代码是从busybox中的insmod.c中弄过来的,不过效果还是有的.
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>
#include <sys/syscall.h>
#define INT_MAX ((int)(~0U>>1))
void* try_to_mmap_module(const char *filename, size_t *image_size_p)
{
void *image;
struct stat st;
int fd;
fd = open(filename, O_RDONLY);
fstat(fd, &st);
image = NULL;
/* st.st_size is off_t, we
can't just pass it to mmap */
if (st.st_size <= *image_size_p) {
size_t image_size = st.st_size;
image = mmap(NULL, image_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (image == MAP_FAILED)
image = NULL;
else
/* Success. Report the size */
*image_size_p = image_size;
}
close(fd);
return image;
}
int main(int argc, char **argv)
{
char *filename;
int rc;
size_t image_size;
char *image;
filename = *++argv;
image_size = INT_MAX - 4095;
image = try_to_mmap_module(filename, &image_size);
if (image == NULL) {
printf("mmap error\n");
return -1;
}
//loads the relocated module image into kernel space and runs
the module's init function.
printf("next init_module\n");
rc = init_module(image, image_size, ""); //它只是调用了
init_module,为了简单不可带参数
if(rc)
{
printf("init_module failed %d:%s\n", errno, strerror(errno));
munmap(image, image_size);
return rc;
}
printf("init module sucess\n");
return rc;
}
Makefile
CC=arm-none-linux-gnueabi-gcc
inm: inm.c
$(CC) -g -o
$@ $<
2. 实验结果如下:
root@OK6410:/work/hello# ../inm ./hello.ko
init module sucess
root@OK6410:/work/hello# lsmod
hello 2045 0 - Live 0xbf004000
二. linux内核的模块装载过程
为何只调用一个init_module就会把模块装载到内核呢?下面就分析一下它的过程
在kernel/module.c中
SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned
long, len, const char
__user *, uargs)
{
struct module *mod;
int ret = 0;
if (!capable(CAP_SYS_MODULE) || modules_disabled) //判断是否有权限
return -EPERM;
mod = load_module(umod, len, uargs); //主要过程都在这儿
blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); //通知有新的模块来了
set_section_ro_nx(mod->module_core, mod->core_text_size, mod->core_ro_size, mod->core_size);
set_section_ro_nx(mod->module_init, mod->init_text_size, mod->init_ro_size, mod->init_size);
do_mod_ctors(mod);
if (mod->init != NULL)
ret = do_one_initcall(mod->init); //执行模块的init函数
if (ret < 0) {
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod);
free_module(mod);
wake_up(&module_wq);
return ret;
}
if (ret > 0)
dump_stack();
mod->state = MODULE_STATE_LIVE;
wake_up(&module_wq);
blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod);
async_synchronize_full();
mutex_lock(&module_mutex);
module_put(mod);
trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
mod->num_symtab = mod->core_num_syms;
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
unset_module_init_ro_nx(mod);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
mod->init_ro_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);
return 0;
}
注: SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned long, len, const char __user
*, uargs)
其中SYSCALL_DEFINE3是定义在include/linux/syscalls.h中
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINEx(x, sname, ...) __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
#define __SYSCALL_DEFINEx(x, name, ...) asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))
asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))
还有一个宏 __SC_DECL*也是定义在include/linux/syscalls.h中,
它的作用是: 去掉两个参数之间的逗号,这TMD还是递归的!!
#define __SC_DECL1(t1, a1) t1 a1
#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)
所以是终展开如下:
asmlinkage long sys_init_module(void __user* umod, unsinged long len, const char __user* uargs)
为什么是SYSCALL_DEFINE3? 后面的3是代表有3个参数.
2. 在load_module中完成大部分操作
init_module
--> load_module
static struct module *load_module(void __user *umod, unsigned
long len, const char __user *uargs)
{
struct load_info info = { NULL, };
copy_and_check(&info, umod, len, uargs); //1.申请一个hello.ko大小的内存,并检查hello.ko是否合法
struct module* mod = layout_and_allocate(&info); //2.
解析模块hello.ko,并把含SHF_ALLOC的数据加载到新内存中
module_unload_init(mod); //3.
find_module_sections(mod, &info); //4.
check_module_license_and_versions(mod); //5.
setup_modinfo(mod, &info); //6.
simplify_symbols(mod, &info); //7.
apply_relocations(mod, &info); //8.
post_relocation(mod, &info); //9.
flush_module_icache(mod); //10.
mod->args = strndup_user(uargs, ~0UL >> 1);
mod->state = MODULE_STATE_COMING;
mutex_lock(&module_mutex);
if (find_module(mod->name)) {
err = -EEXIST;
goto unlock;
}
if (!mod->taints || mod->taints == (1U<<TAINT_CRAP))
dynamic_debug_setup(info.debug, info.num_debug);
err = verify_export_symbols(mod);
module_bug_finalize(info.hdr, info.sechdrs, mod);
list_add_rcu(&mod->list, &modules);
mutex_unlock(&module_mutex);
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
kfree(info.strmap);
free_copy(&info);
trace_module_load(mod);
return mod;
}
下面以hello.ko的加载为例分析一下:
0000000: 7f45 4c46 0101 0100 0000 0000 0000 0000 //e_ident
0000010: 0100 2800 0100 0000 0000 0000 0000 0000
type mach version entry phoff
0000020: 2cb7 0000 0000 0005 3400 0000 0000 2800
shoff flags ehsize phentsize phnum shensize
0000030: 2400 2100
shnum shstrndx
e_shoff = 0xb72c = 46892 --> section header table在文件中的偏移是46892
e_shentsize = 0x28 = 40 --> section header table 每个entry是40Byte
e_shnum = 0x24 =36 --> 有36个section header table
e_shstrndx = 0x21 = 33 --> 说明.shstrtab这个section在section_header_table中的第33项
2.1 第1步申请内存,并把ko数据从用户区copy到内核区
root@OK6410:/work/hello# ls -l hello.ko
-rw-rw-r-- 1 1000 1000 62173 Aug 6 13:58 hello.ko
init_module
--> load_module
--> copy_and_check
static int copy_and_check(struct load_info *info, const void
__user *umod, unsigned long len, const char
__user *uargs)
{
int err;
Elf_Ehdr *hdr;
if (len < sizeof(*hdr))
return -ENOEXEC;
//根据hello.ko的大小来申请内存
if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) //这儿的len就是hello.ko的size=62173
return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) { //将hello.ko整个文件由用户空间copy到内核空间
err = -EFAULT;
goto free_hdr;
}
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 //检查ident字段的四个字节是不是'\177ELF'
|| hdr->e_type != ET_REL //e_type
必须是1, 说明这是relocatable file
|| !elf_check_arch(hdr) //检查e_machine=0x28代表arm
|| hdr->e_shentsize != sizeof(Elf_Shdr)) { //elf的section_header
是不是等于sizeof(Elf_Shdr)
err = -ENOEXEC;
goto free_hdr;
}
if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) { //检查长度:
e_shoff是section_header_table的起始
err = -ENOEXEC; //hdr->e_shnum * sizeof(Elf_Shdr) 是section_header_table的大小
goto free_hdr;
}
info->hdr = hdr; //保存在info中
info->len = len;
return 0;
}
2.2 第2步申请内存,并把ko数据从用户区copy到内核区
init_module
--> load_module
--> layout_and_allocate
static struct module *layout_and_allocate(struct load_info *info)
{
/* Module within temporary copy. */
struct module *mod;
Elf_Shdr *pcpusec;
mod = setup_load_info(info); //1.更新每一个section的实际地址,并返回gnu.linkonce.this_module这个section的地址
check_modinfo(mod, info); //2.检查mofinfo这个section中的内容
//这个函数是空的
module_frob_arch_sections(info->hdr, info->sechdrs, info->secstrings, mod);
//没有pcpu这个section, pcpupcpusec->sh_size=0
pcpusec = &info->sechdrs[info->index.pcpu];
if (pcpusec->sh_size) {
/* We have a special allocation for this section. */
err = percpu_modalloc(mod, pcpusec->sh_size, pcpusec->sh_addralign);
if (err)
goto out;
pcpusec->sh_flags &= ~(unsigned
long)SHF_ALLOC;
}
//3.这儿的len就是hello.ko的size=62173
layout_sections(mod, info);
//为符号名称字符串表分配内存
info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size)* sizeof(long), GFP_KERNEL);
//4. 如果配置了选项CONFIG_KALLSYMS,就要把符号名称的字符串表加载到内存
layout_symtab(mod, info);
//5. 把hello.ko中需要加载到内存的加载到内存
err = move_module(mod, info);
//.gnu.linkonce.this_module的section的地址也要相应改变
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
kmemleak_load_module(mod, info);
return mod;
}
2.2.1 HDR的第一次修改
init_module
--> load_module
--> layout_and_allocate
--> setup_load_info
更新每一个section的实际地址,并返回gnu.linkonce.this_module这个section的地址
static struct module *setup_load_info(struct load_info *info)
{
unsigned int i;
int err;
struct module *mod;
//获取section header table基地址
info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
//获取.shstrtab这个section的地址
info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset;
err = rewrite_section_headers(info); //更新sh_addr的地址
//遍历查找符号名称字符串表
for (i = 1; i < info->hdr->e_shnum; i++) {
if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
info->index.sym = i;
info->index.str = info->sechdrs[i].sh_link;
info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset;
break;
}
}
//遍历,并找出gnu.linkonce.this_module这个section在section_table中的地址.
//如果找不到,则说明这不是一个模块,返加error
info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
//找到gnu.linkonce.this_module这个section的地址
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
//如果strip了hello.ko,那么在加载时找不到符号,也会报错
if (info->index.sym == 0)
return ERR_PTR(-ENOEXEC);
info->index.pcpu = find_pcpusec(info);
if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
return ERR_PTR(-ENOEXEC);
return mod; //返回gnu.linkonce.this_module这个section的地址
}
init_module
--> load_module
--> layout_and_allocate
--> setup_load_info
--> rewrite_section_headers
因为hello.ko己经被读取到了内核空间的某一个内存处,要想访问每个section的地址需要通过sh_addr
但是现在sh_addr这个值己经不准了,所以需要用新地址来更新一下
static int rewrite_section_headers(struct load_info *info)
{
unsigned int i;
info->sechdrs[0].sh_addr = 0;
for (i = 1; i < info->hdr->e_shnum; i++) {
Elf_Shdr *shdr = &info->sechdrs[i];
if (shdr->sh_type != SHT_NOBITS && info->len < shdr->sh_offset + shdr->sh_size)
return -ENOEXEC;
//现在己经把hello.ko读到了内核空间的某一个地址处,sh_addr是指向section的地址需要更新
//在section_header_table中修改每一个secion的地址
shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
#ifndef CONFIG_MODULE_UNLOAD //如果配了这个宏说明,模块加载后不可卸载
//模块都不可卸载了, exit这个section也没有加载的必要了
if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
shdr->sh_flags &= ~(unsigned
long)SHF_ALLOC;
#endif
}
info->index.vers = find_sec(info, "__versions"); //查找version这个section的地址
info->index.info = find_sec(info, ".modinfo"); //查找modinfo这个section的地址
info->sechdrs[info->index.info].sh_flags &= ~(unsigned
long)SHF_ALLOC; //把version 与 modinfo这两个section
info->sechdrs[info->index.vers].sh_flags &= ~(unsigned
long)SHF_ALLOC; //标记为最终不可见
return 0;
}
init_module
--> load_module
--> layout_and_allocate
--> setup_load_info
--> find_sec
//遍历整个section_header_table找到name与参数相同的section,返回这个section在section_header_table中的索引
static unsigned int find_sec(const struct
load_info *info, const char *name)
{
//info->secstrings是.shstrtab的地址,
shdr->sh_name是每一个section的名字在.shstrtab中的偏移
for (i = 1; i < info->hdr->e_shnum; i++) {
Elf_Shdr *shdr = &info->sechdrs[i];
if ((shdr->sh_flags & SHF_ALLOC) && strcmp(info->secstrings + shdr->sh_name, name) == 0)
return i;
}
return 0;
}
2.2.2 检查section中的版本号
init_module
--> load_module
--> layout_and_allocate
--> check_modinfo
检查mofinfo这个section中的内容
static int check_modinfo(struct module *mod, struct
load_info *info)
{
const char *modmagic = get_modinfo(info, "vermagic");
int err;
//检查编译模块的系统版本与当前运行的系统版本是否一致,hello.ko中vermagic=3.0.1
if (!modmagic) {
err = try_to_force_load(mod, "bad
vermagic");
if (err)
return err;
} else if (!same_magic(modmagic, vermagic, info->index.vers)) {
printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
mod->name, modmagic, vermagic);
return -ENOEXEC;
}
if (get_modinfo(info, "staging")) {
add_taint_module(mod, TAINT_CRAP);
printk(KERN_WARNING "%s: module is from the staging directory,"
" the quality is unknown, you have been warned.\n",
mod->name);
}
//检查license是不是GPL
set_license(mod, get_modinfo(info, "license"));
return 0;
}
init_module
--> load_module
--> layout_and_allocate
--> check_modinfo
--> get_modinfo
static char *get_modinfo(struct load_info *info, const char *tag)
{
unsigned int taglen = strlen(tag);
//在函数rewrite_section_headers中有:
info->index.info = find_sec(info, ".modinfo");
//这儿是获取modinfo这个section在section_table中的地址
Elf_Shdr *infosec = &info->sechdrs[info->index.info];
unsigned long size = infosec->sh_size; //modinfo这个section的长度
//在modinfo中每一项都以\0结束
for (p = (char *)infosec->sh_addr; p; p = next_string(p, &size)) {
if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')
return p + taglen + 1; //返回等号后面的第一个字节的地址,既结果
}
return NULL;
}
下面是用objdump打印的modinfo
从上图可以看出每一个字段的结束都是 00
2.2.3 划分为两部分CORE 与 INIT
init_module
--> load_module
--> layout_and_allocate
--> layout_sections
static void layout_sections(struct module *mod, struct
load_info *info)
{
static unsigned long const masks[][2] = {
{ SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
{ SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
{ SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
{ ARCH_SHF_SMALL | SHF_ALLOC, 0 }
};
unsigned int m, i;
for (i = 0; i < info->hdr->e_shnum; i++)
info->sechdrs[i].sh_entsize = ~0UL;
//划分为两部分: CORE INIT
//a. 第1部分CORE: 查找标志中含有SHF_ALLOC的section
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
const char *sname = info->secstrings + s->sh_name;
//含有SHF_ALLOC的section需要加载到最终的内存
//含有SHF_ALLOC的section并且不以init开头的划分到CORE部分
if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL || strstarts(sname, ".init"))
continue;
s->sh_entsize = get_offset(mod, &mod->core_size, s, i); //sh_entsize是core+init的size
}
switch (m) {
case 0: //可执行的段,代码段都一样
mod->core_size = debug_align(mod->core_size);
mod->core_text_size = mod->core_size;
break;
case 1: //只读段
mod->core_size = debug_align(mod->core_size);
mod->core_ro_size = mod->core_size;
break;
case 3: //所有段
mod->core_size = debug_align(mod->core_size);
break;
}
}
//b. 第2部分INIT
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
const char *sname = info->secstrings + s->sh_name;
//含有SHF_ALLOC的section需要加载到最终的内存
//含有SHF_ALLOC的section并且以init开头的划分到INIT部分
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
|| !strstarts(sname, ".init"))
continue;
s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | INIT_OFFSET_MASK);
}
switch (m) {
case 0:
mod->init_size = debug_align(mod->init_size);
mod->init_text_size = mod->init_size;
break;
case 1:
mod->init_size = debug_align(mod->init_size);
mod->init_ro_size = mod->init_size;
break;
case 3:
mod->init_size = debug_align(mod->init_size);
break;
}
}
}
注: 这儿为什么要区分init与core呢? 因为init部分的内存在使用完之后,马上就会被释放,而core部分的内存则会一直存在于内存中
2.2.4 加载符号名称字符串表
当配置了内核选项CONFIG_KALLSYMS时,就需要把符号名称字符串表加载到内存中去
因为这个section的标志位中不含 SHF_ALLOC,所以需要单独加载
static void layout_symtab(struct module *mod, struct
load_info *info)
{
Elf_Shdr *symsect = info->sechdrs + info->index.sym;
Elf_Shdr *strsect = info->sechdrs + info->index.str;
const Elf_Sym *src;
unsigned int i, nsrc, ndst;
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
info->index.sym) | INIT_OFFSET_MASK;
DEBUGP("\t%s\n", info->secstrings + symsect->sh_name);
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
for (ndst = i = 1; i < nsrc; ++i, ++src)
if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
unsigned int j = src->st_name;
while (!__test_and_set_bit(j, info->strmap)
&& info->strtab[j])
++j;
++ndst;
}
/* Append room for core symbols at end of
core part. */
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
/* Put string table section at end of
init part of module. */
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
info->index.str) | INIT_OFFSET_MASK;
DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);
/* Append room for core symbols' strings
at end of core part. */
info->stroffs = mod->core_size;
__set_bit(0, info->strmap);
mod->core_size += bitmap_weight(info->strmap, strsect->sh_size);
}
2.2.5 HDR视图的第二次转移
为core与ini区分配新的内存,并把core与init部分copy到新内存中
static int move_module(struct module *mod, struct
load_info *info)
{
int i;
void *ptr;
//对core区分配内存
ptr = module_alloc_update_bounds(mod->core_size);
kmemleak_not_leak(ptr); //检查内存泄漏
memset(ptr, 0, mod->core_size); //将对core区内存清0
mod->module_core = ptr; //将core内存指针记录在变量module_core中
//对init区分配内存
ptr = module_alloc_update_bounds(mod->init_size);
kmemleak_ignore(ptr); //检查内存泄漏
memset(ptr, 0, mod->init_size); //将对init区内存清0
mod->module_init = ptr;
//将init内存指针记录在变量module_init中
//将core与init区copy到新分配的内存中去
for (i = 0; i < info->hdr->e_shnum; i++) {
void *dest;
Elf_Shdr *shdr = &info->sechdrs[i];
if (!(shdr->sh_flags & SHF_ALLOC))
continue;
if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->module_init + (shdr->sh_entsize & ~INIT_OFFSET_MASK);
else
dest = mod->module_core + shdr->sh_entsize;
if (shdr->sh_type != SHT_NOBITS)
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
//sectin再次移动,那么section_header_table中指向section的指针也要相应改变
shdr->sh_addr = (unsigned
long)dest;
}
return 0;
}
附:
1. 关于c中的转义
类似于 \t \b
#include <stdio.h>
#include <stdlib.h>
int main ( int argc, char *argv[] )
{
printf("0x%x\n", '\177');
return EXIT_SUCCESS;
}
gcc -o test test.c
结果是: '\177'
= 0x7f
\OOO : 8进制
\xXX : 16进制
2. 关于gcc中的条件表达式
表达式a?b:c, 省略了中间的b,是什么意思呢?
#include <stdio.h>
#include <stdlib.h>
int main ( int argc, char *argv[] )
{
int a=2;
printf("result=%d\n", a?:1);
return 0;
}
这是gcc的一个扩展: a?:c == a?a:c
参考文章:
http://gcc.gnu.org/onlinedocs/gcc/Conditionals.html#Conditionals
相关文章推荐
- Linux内核---39.ELF 结构分析
- centos更新163源并升级内核
- Linux小知识
- linux常用命令
- 在linux使用make编译ArduPilot for Pixhawk/PX4 ArduPilot 编译环境搭建
- gcc: error: elf_i386: No such file or directory
- 橙子第一篇文章
- linux菜鸟学习(一)----ls,cd,pwd,mkdir,rmdir,rm
- centos yum升级php5.3.3到最5.6.3
- 【linux GDB】linux下GDB调试器_学习笔记_003
- centos安装php扩展mssql
- centos最常用命令及终端快捷键整理
- ELF文件分析
- CentOS系统内核升级
- Linux基础-----find和重定向命令
- linux下使用eclipse编辑,链接,使用动态库的学习笔记
- Linux下(软件)标准编译安装原理
- Linux安装包选择
- centos7之lamp环境搭建
- 嵌入式Linux开发环境