您的位置:首页 > 运维架构 > Linux

Linux内核---40.模块加载过程分析

2016-07-09 10:20 513 查看
一. 自己写的一个insmod小程序

1. 下面是一个类似于insmod的程序

绝大多数代码是从busybox中的insmod.c中弄过来的,不过效果还是有的.

#include <stdio.h>

#include <stdlib.h>

#include <sys/types.h>

#include <sys/stat.h>

#include <unistd.h>

#include <fcntl.h>

#include <sys/mman.h>

#include <errno.h>

#include <sys/syscall.h>

#define INT_MAX ((int)(~0U>>1))

void* try_to_mmap_module(const char *filename, size_t *image_size_p)

{

void *image;

struct stat st;

int fd;

fd = open(filename, O_RDONLY);

fstat(fd, &st);

image = NULL;

/* st.st_size is off_t, we
can't just pass it to mmap */

if (st.st_size <= *image_size_p) {

size_t image_size = st.st_size;

image = mmap(NULL, image_size, PROT_READ, MAP_PRIVATE, fd, 0);

if (image == MAP_FAILED)

image = NULL;

else

/* Success. Report the size */

*image_size_p = image_size;

}

close(fd);

return image;

}

int main(int argc, char **argv)

{

char *filename;

int rc;

size_t image_size;

char *image;

filename = *++argv;

image_size = INT_MAX - 4095;

image = try_to_mmap_module(filename, &image_size);

if (image == NULL) {

printf("mmap error\n");

return -1;

}

//loads the relocated module image into kernel space and runs
the module's init function.

printf("next init_module\n");

rc = init_module(image, image_size, ""); //它只是调用了
init_module,为了简单不可带参数

if(rc)

{

printf("init_module failed %d:%s\n", errno, strerror(errno));

munmap(image, image_size);

return rc;

}

printf("init module sucess\n");

return rc;

}

Makefile

CC=arm-none-linux-gnueabi-gcc

inm: inm.c

$(CC) -g -o
$@ $<

2. 实验结果如下:

root@OK6410:/work/hello# ../inm ./hello.ko

init module sucess

root@OK6410:/work/hello# lsmod

hello 2045 0 - Live 0xbf004000

二. linux内核的模块装载过程

为何只调用一个init_module就会把模块装载到内核呢?下面就分析一下它的过程

在kernel/module.c中

SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned
long, len, const char
__user *, uargs)

{

struct module *mod;

int ret = 0;

if (!capable(CAP_SYS_MODULE) || modules_disabled) //判断是否有权限

return -EPERM;

mod = load_module(umod, len, uargs); //主要过程都在这儿

blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); //通知有新的模块来了

set_section_ro_nx(mod->module_core, mod->core_text_size, mod->core_ro_size, mod->core_size);

set_section_ro_nx(mod->module_init, mod->init_text_size, mod->init_ro_size, mod->init_size);

do_mod_ctors(mod);

if (mod->init != NULL)

ret = do_one_initcall(mod->init); //执行模块的init函数

if (ret < 0) {

mod->state = MODULE_STATE_GOING;

synchronize_sched();

module_put(mod);

blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod);

free_module(mod);

wake_up(&module_wq);

return ret;

}

if (ret > 0)

dump_stack();

mod->state = MODULE_STATE_LIVE;

wake_up(&module_wq);

blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod);

async_synchronize_full();

mutex_lock(&module_mutex);

module_put(mod);

trim_init_extable(mod);

#ifdef CONFIG_KALLSYMS

mod->num_symtab = mod->core_num_syms;

mod->symtab = mod->core_symtab;

mod->strtab = mod->core_strtab;

#endif

unset_module_init_ro_nx(mod);

module_free(mod, mod->module_init);

mod->module_init = NULL;

mod->init_size = 0;

mod->init_ro_size = 0;

mod->init_text_size = 0;

mutex_unlock(&module_mutex);

return 0;

}

注: SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned long, len, const char __user
*, uargs)

其中SYSCALL_DEFINE3是定义在include/linux/syscalls.h中

#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)

#define SYSCALL_DEFINEx(x, sname, ...) __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

#define __SYSCALL_DEFINEx(x, name, ...) asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))

asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))

还有一个宏 __SC_DECL*也是定义在include/linux/syscalls.h中,

它的作用是: 去掉两个参数之间的逗号,这TMD还是递归的!!

#define __SC_DECL1(t1, a1) t1 a1

#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)

#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)

所以是终展开如下:

asmlinkage long sys_init_module(void __user* umod, unsinged long len, const char __user* uargs)

为什么是SYSCALL_DEFINE3? 后面的3是代表有3个参数.

2. 在load_module中完成大部分操作

init_module

--> load_module

static struct module *load_module(void __user *umod, unsigned
long len, const char __user *uargs)

{

struct load_info info = { NULL, };

copy_and_check(&info, umod, len, uargs); //1.申请一个hello.ko大小的内存,并检查hello.ko是否合法

struct module* mod = layout_and_allocate(&info); //2.
解析模块hello.ko,并把含SHF_ALLOC的数据加载到新内存中

module_unload_init(mod); //3.

find_module_sections(mod, &info); //4.

check_module_license_and_versions(mod); //5.

setup_modinfo(mod, &info); //6.

simplify_symbols(mod, &info); //7.

apply_relocations(mod, &info); //8.

post_relocation(mod, &info); //9.

flush_module_icache(mod); //10.

mod->args = strndup_user(uargs, ~0UL >> 1);

mod->state = MODULE_STATE_COMING;

mutex_lock(&module_mutex);

if (find_module(mod->name)) {

err = -EEXIST;

goto unlock;

}

if (!mod->taints || mod->taints == (1U<<TAINT_CRAP))

dynamic_debug_setup(info.debug, info.num_debug);

err = verify_export_symbols(mod);

module_bug_finalize(info.hdr, info.sechdrs, mod);

list_add_rcu(&mod->list, &modules);

mutex_unlock(&module_mutex);

err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);

err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);

kfree(info.strmap);

free_copy(&info);

trace_module_load(mod);

return mod;

}

下面以hello.ko的加载为例分析一下:



0000000: 7f45 4c46 0101 0100 0000 0000 0000 0000 //e_ident

0000010: 0100 2800 0100 0000 0000 0000 0000 0000

type mach version entry phoff

0000020: 2cb7 0000 0000 0005 3400 0000 0000 2800

shoff flags ehsize phentsize phnum shensize

0000030: 2400 2100

shnum shstrndx

e_shoff = 0xb72c = 46892 --> section header table在文件中的偏移是46892

e_shentsize = 0x28 = 40 --> section header table 每个entry是40Byte

e_shnum = 0x24 =36 --> 有36个section header table

e_shstrndx = 0x21 = 33 --> 说明.shstrtab这个section在section_header_table中的第33项

2.1 第1步申请内存,并把ko数据从用户区copy到内核区

root@OK6410:/work/hello# ls -l hello.ko

-rw-rw-r-- 1 1000 1000 62173 Aug 6 13:58 hello.ko

init_module

--> load_module

--> copy_and_check

static int copy_and_check(struct load_info *info, const void
__user *umod, unsigned long len, const char
__user *uargs)

{

int err;

Elf_Ehdr *hdr;

if (len < sizeof(*hdr))

return -ENOEXEC;

//根据hello.ko的大小来申请内存

if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) //这儿的len就是hello.ko的size=62173

return -ENOMEM;

if (copy_from_user(hdr, umod, len) != 0) { //将hello.ko整个文件由用户空间copy到内核空间

err = -EFAULT;

goto free_hdr;

}

if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 //检查ident字段的四个字节是不是'\177ELF'

|| hdr->e_type != ET_REL //e_type
必须是1, 说明这是relocatable file

|| !elf_check_arch(hdr) //检查e_machine=0x28代表arm

|| hdr->e_shentsize != sizeof(Elf_Shdr)) { //elf的section_header
是不是等于sizeof(Elf_Shdr)

err = -ENOEXEC;

goto free_hdr;

}

if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) { //检查长度:
e_shoff是section_header_table的起始

err = -ENOEXEC; //hdr->e_shnum * sizeof(Elf_Shdr) 是section_header_table的大小

goto free_hdr;

}

info->hdr = hdr; //保存在info中

info->len = len;

return 0;

}

2.2 第2步申请内存,并把ko数据从用户区copy到内核区

init_module

--> load_module

--> layout_and_allocate

static struct module *layout_and_allocate(struct load_info *info)

{

/* Module within temporary copy. */

struct module *mod;

Elf_Shdr *pcpusec;

mod = setup_load_info(info); //1.更新每一个section的实际地址,并返回gnu.linkonce.this_module这个section的地址

check_modinfo(mod, info); //2.检查mofinfo这个section中的内容

//这个函数是空的

module_frob_arch_sections(info->hdr, info->sechdrs, info->secstrings, mod);

//没有pcpu这个section, pcpupcpusec->sh_size=0

pcpusec = &info->sechdrs[info->index.pcpu];

if (pcpusec->sh_size) {

/* We have a special allocation for this section. */

err = percpu_modalloc(mod, pcpusec->sh_size, pcpusec->sh_addralign);

if (err)

goto out;

pcpusec->sh_flags &= ~(unsigned
long)SHF_ALLOC;

}

//3.这儿的len就是hello.ko的size=62173

layout_sections(mod, info);

//为符号名称字符串表分配内存

info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size)* sizeof(long), GFP_KERNEL);

//4. 如果配置了选项CONFIG_KALLSYMS,就要把符号名称的字符串表加载到内存

layout_symtab(mod, info);

//5. 把hello.ko中需要加载到内存的加载到内存

err = move_module(mod, info);

//.gnu.linkonce.this_module的section的地址也要相应改变

mod = (void *)info->sechdrs[info->index.mod].sh_addr;

kmemleak_load_module(mod, info);

return mod;

}

2.2.1 HDR的第一次修改

init_module

--> load_module

--> layout_and_allocate

--> setup_load_info

更新每一个section的实际地址,并返回gnu.linkonce.this_module这个section的地址

static struct module *setup_load_info(struct load_info *info)

{

unsigned int i;

int err;

struct module *mod;

//获取section header table基地址

info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;

//获取.shstrtab这个section的地址

info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset;

err = rewrite_section_headers(info); //更新sh_addr的地址

//遍历查找符号名称字符串表

for (i = 1; i < info->hdr->e_shnum; i++) {

if (info->sechdrs[i].sh_type == SHT_SYMTAB) {

info->index.sym = i;

info->index.str = info->sechdrs[i].sh_link;

info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset;

break;

}

}

//遍历,并找出gnu.linkonce.this_module这个section在section_table中的地址.

//如果找不到,则说明这不是一个模块,返加error

info->index.mod = find_sec(info, ".gnu.linkonce.this_module");

//找到gnu.linkonce.this_module这个section的地址

mod = (void *)info->sechdrs[info->index.mod].sh_addr;

//如果strip了hello.ko,那么在加载时找不到符号,也会报错

if (info->index.sym == 0)

return ERR_PTR(-ENOEXEC);

info->index.pcpu = find_pcpusec(info);

if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))

return ERR_PTR(-ENOEXEC);

return mod; //返回gnu.linkonce.this_module这个section的地址

}

init_module

--> load_module

--> layout_and_allocate

--> setup_load_info

--> rewrite_section_headers

因为hello.ko己经被读取到了内核空间的某一个内存处,要想访问每个section的地址需要通过sh_addr

但是现在sh_addr这个值己经不准了,所以需要用新地址来更新一下

static int rewrite_section_headers(struct load_info *info)

{

unsigned int i;

info->sechdrs[0].sh_addr = 0;

for (i = 1; i < info->hdr->e_shnum; i++) {

Elf_Shdr *shdr = &info->sechdrs[i];

if (shdr->sh_type != SHT_NOBITS && info->len < shdr->sh_offset + shdr->sh_size)

return -ENOEXEC;

//现在己经把hello.ko读到了内核空间的某一个地址处,sh_addr是指向section的地址需要更新

//在section_header_table中修改每一个secion的地址

shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;

#ifndef CONFIG_MODULE_UNLOAD //如果配了这个宏说明,模块加载后不可卸载

//模块都不可卸载了, exit这个section也没有加载的必要了

if (strstarts(info->secstrings+shdr->sh_name, ".exit"))

shdr->sh_flags &= ~(unsigned
long)SHF_ALLOC;

#endif

}

info->index.vers = find_sec(info, "__versions"); //查找version这个section的地址

info->index.info = find_sec(info, ".modinfo"); //查找modinfo这个section的地址

info->sechdrs[info->index.info].sh_flags &= ~(unsigned
long)SHF_ALLOC; //把version 与 modinfo这两个section

info->sechdrs[info->index.vers].sh_flags &= ~(unsigned
long)SHF_ALLOC; //标记为最终不可见

return 0;

}

init_module

--> load_module

--> layout_and_allocate

--> setup_load_info

--> find_sec

//遍历整个section_header_table找到name与参数相同的section,返回这个section在section_header_table中的索引

static unsigned int find_sec(const struct
load_info *info, const char *name)

{

//info->secstrings是.shstrtab的地址,
shdr->sh_name是每一个section的名字在.shstrtab中的偏移

for (i = 1; i < info->hdr->e_shnum; i++) {

Elf_Shdr *shdr = &info->sechdrs[i];

if ((shdr->sh_flags & SHF_ALLOC) && strcmp(info->secstrings + shdr->sh_name, name) == 0)

return i;

}

return 0;

}

2.2.2 检查section中的版本号

init_module

--> load_module

--> layout_and_allocate

--> check_modinfo

检查mofinfo这个section中的内容

static int check_modinfo(struct module *mod, struct
load_info *info)

{

const char *modmagic = get_modinfo(info, "vermagic");

int err;

//检查编译模块的系统版本与当前运行的系统版本是否一致,hello.ko中vermagic=3.0.1

if (!modmagic) {

err = try_to_force_load(mod, "bad
vermagic");

if (err)

return err;

} else if (!same_magic(modmagic, vermagic, info->index.vers)) {

printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",

mod->name, modmagic, vermagic);

return -ENOEXEC;

}

if (get_modinfo(info, "staging")) {

add_taint_module(mod, TAINT_CRAP);

printk(KERN_WARNING "%s: module is from the staging directory,"

" the quality is unknown, you have been warned.\n",

mod->name);

}

//检查license是不是GPL

set_license(mod, get_modinfo(info, "license"));

return 0;

}

init_module

--> load_module

--> layout_and_allocate

--> check_modinfo

--> get_modinfo

static char *get_modinfo(struct load_info *info, const char *tag)

{

unsigned int taglen = strlen(tag);

//在函数rewrite_section_headers中有:
info->index.info = find_sec(info, ".modinfo");

//这儿是获取modinfo这个section在section_table中的地址

Elf_Shdr *infosec = &info->sechdrs[info->index.info];

unsigned long size = infosec->sh_size; //modinfo这个section的长度

//在modinfo中每一项都以\0结束

for (p = (char *)infosec->sh_addr; p; p = next_string(p, &size)) {

if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')

return p + taglen + 1; //返回等号后面的第一个字节的地址,既结果

}

return NULL;

}

下面是用objdump打印的modinfo



从上图可以看出每一个字段的结束都是 00

2.2.3 划分为两部分CORE 与 INIT

init_module

--> load_module

--> layout_and_allocate

--> layout_sections

static void layout_sections(struct module *mod, struct
load_info *info)

{

static unsigned long const masks[][2] = {

{ SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },

{ SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },

{ SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },

{ ARCH_SHF_SMALL | SHF_ALLOC, 0 }

};

unsigned int m, i;

for (i = 0; i < info->hdr->e_shnum; i++)

info->sechdrs[i].sh_entsize = ~0UL;

//划分为两部分: CORE INIT

//a. 第1部分CORE: 查找标志中含有SHF_ALLOC的section

for (m = 0; m < ARRAY_SIZE(masks); ++m) {

for (i = 0; i < info->hdr->e_shnum; ++i) {

Elf_Shdr *s = &info->sechdrs[i];

const char *sname = info->secstrings + s->sh_name;

//含有SHF_ALLOC的section需要加载到最终的内存

//含有SHF_ALLOC的section并且不以init开头的划分到CORE部分

if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1])

|| s->sh_entsize != ~0UL || strstarts(sname, ".init"))

continue;

s->sh_entsize = get_offset(mod, &mod->core_size, s, i); //sh_entsize是core+init的size

}

switch (m) {

case 0: //可执行的段,代码段都一样

mod->core_size = debug_align(mod->core_size);

mod->core_text_size = mod->core_size;

break;

case 1: //只读段

mod->core_size = debug_align(mod->core_size);

mod->core_ro_size = mod->core_size;

break;

case 3: //所有段

mod->core_size = debug_align(mod->core_size);

break;

}

}

//b. 第2部分INIT

for (m = 0; m < ARRAY_SIZE(masks); ++m) {

for (i = 0; i < info->hdr->e_shnum; ++i) {

Elf_Shdr *s = &info->sechdrs[i];

const char *sname = info->secstrings + s->sh_name;

//含有SHF_ALLOC的section需要加载到最终的内存

//含有SHF_ALLOC的section并且以init开头的划分到INIT部分

if ((s->sh_flags & masks[m][0]) != masks[m][0]

|| (s->sh_flags & masks[m][1])

|| s->sh_entsize != ~0UL

|| !strstarts(sname, ".init"))

continue;

s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | INIT_OFFSET_MASK);

}

switch (m) {

case 0:

mod->init_size = debug_align(mod->init_size);

mod->init_text_size = mod->init_size;

break;

case 1:

mod->init_size = debug_align(mod->init_size);

mod->init_ro_size = mod->init_size;

break;

case 3:

mod->init_size = debug_align(mod->init_size);

break;

}

}

}

注: 这儿为什么要区分init与core呢? 因为init部分的内存在使用完之后,马上就会被释放,而core部分的内存则会一直存在于内存中

2.2.4 加载符号名称字符串表

当配置了内核选项CONFIG_KALLSYMS时,就需要把符号名称字符串表加载到内存中去

因为这个section的标志位中不含 SHF_ALLOC,所以需要单独加载

static void layout_symtab(struct module *mod, struct
load_info *info)

{

Elf_Shdr *symsect = info->sechdrs + info->index.sym;

Elf_Shdr *strsect = info->sechdrs + info->index.str;

const Elf_Sym *src;

unsigned int i, nsrc, ndst;

/* Put symbol section at end of init part of module. */

symsect->sh_flags |= SHF_ALLOC;

symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,

info->index.sym) | INIT_OFFSET_MASK;

DEBUGP("\t%s\n", info->secstrings + symsect->sh_name);

src = (void *)info->hdr + symsect->sh_offset;

nsrc = symsect->sh_size / sizeof(*src);

for (ndst = i = 1; i < nsrc; ++i, ++src)

if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {

unsigned int j = src->st_name;

while (!__test_and_set_bit(j, info->strmap)

&& info->strtab[j])

++j;

++ndst;

}

/* Append room for core symbols at end of
core part. */

info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);

mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);

/* Put string table section at end of
init part of module. */

strsect->sh_flags |= SHF_ALLOC;

strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,

info->index.str) | INIT_OFFSET_MASK;

DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);

/* Append room for core symbols' strings
at end of core part. */

info->stroffs = mod->core_size;

__set_bit(0, info->strmap);

mod->core_size += bitmap_weight(info->strmap, strsect->sh_size);

}

2.2.5 HDR视图的第二次转移

为core与ini区分配新的内存,并把core与init部分copy到新内存中

static int move_module(struct module *mod, struct
load_info *info)

{

int i;

void *ptr;

//对core区分配内存

ptr = module_alloc_update_bounds(mod->core_size);

kmemleak_not_leak(ptr); //检查内存泄漏

memset(ptr, 0, mod->core_size); //将对core区内存清0

mod->module_core = ptr; //将core内存指针记录在变量module_core中

//对init区分配内存

ptr = module_alloc_update_bounds(mod->init_size);

kmemleak_ignore(ptr); //检查内存泄漏

memset(ptr, 0, mod->init_size); //将对init区内存清0

mod->module_init = ptr;
//将init内存指针记录在变量module_init中

//将core与init区copy到新分配的内存中去

for (i = 0; i < info->hdr->e_shnum; i++) {

void *dest;

Elf_Shdr *shdr = &info->sechdrs[i];

if (!(shdr->sh_flags & SHF_ALLOC))

continue;

if (shdr->sh_entsize & INIT_OFFSET_MASK)

dest = mod->module_init + (shdr->sh_entsize & ~INIT_OFFSET_MASK);

else

dest = mod->module_core + shdr->sh_entsize;

if (shdr->sh_type != SHT_NOBITS)

memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);

//sectin再次移动,那么section_header_table中指向section的指针也要相应改变

shdr->sh_addr = (unsigned
long)dest;

}

return 0;

}

附:

1. 关于c中的转义

类似于 \t \b

#include <stdio.h>

#include <stdlib.h>

int main ( int argc, char *argv[] )

{

printf("0x%x\n", '\177');

return EXIT_SUCCESS;

}

gcc -o test test.c

结果是: '\177'
= 0x7f

\OOO : 8进制

\xXX : 16进制

2. 关于gcc中的条件表达式

表达式a?b:c, 省略了中间的b,是什么意思呢?

#include <stdio.h>

#include <stdlib.h>

int main ( int argc, char *argv[] )

{

int a=2;

printf("result=%d\n", a?:1);

return 0;

}

这是gcc的一个扩展: a?:c == a?a:c

参考文章:
http://gcc.gnu.org/onlinedocs/gcc/Conditionals.html#Conditionals
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: