Linux内核kprobe机制实现浅析
2016-02-01 00:00
706 查看
Kprobe机制是内核提供的一种调试机制,它提供了一种方法,能够在不修改现有代码的基础上,灵活的跟踪内核函数的执行。它的基本工作原理是:用户指定一个探测点,并把一个用户定义的处理函数关联到该探测点,当内核执行到该探测点时,相应的关联函数被执行,然后继续执行正常的代码路径。 Kprobe提供了三种形式的探测点,一种是最基本的kprobe,能够在指定代码执行前、执行后进行探测,但此时不能访问被探测函数内的相关变量信息;一种是jprobe,用于探测某一函数的入口,并且能够访问对应的函数参数;一种是kretprobe,用于完成指定函数返回值的探测功能。其中最基本的就是kprobe机制,jprobe以及kretprobe的实现都依赖于kprobe,但其代码的实现都很巧妙,强烈建议每一个内核爱好者阅读。 好了,闲话少叙,开始上代码: 首先是struct kprobe结构,每一个探测点的基本结构。 点击(此处)折叠或打开
struct kprobe {
/*用于保存kprobe的全局hash表,以被探测的addr为key*/
struct hlist_node hlist;
/* list of kprobes for multi-handler support */
/*当对同一个探测点存在多个探测函数时,所有的函数挂在这条链上*/
struct list_head list;
/*count the number of times this probe was temporarily disarmed */
unsigned long nmissed;
/* location of the probe point */
/*被探测的目标地址*/
kprobe_opcode_t *addr;
/* Allow user to indicate symbol name of the probe point */
/*symblo_name的存在,允许用户指定函数名而非确定的地址*/
const char *symbol_name;
/* Offset into the symbol */
/*如果被探测点为函数内部某个指令,需要使用addr + offset的方式*/
unsigned int offset;
/* Called before addr is executed. */
/*探测函数,在目标探测点执行之前调用*/
kprobe_pre_handler_t pre_handler;
/* Called after addr is executed, unless... */
/*探测函数,在目标探测点执行之后调用*/
kprobe_post_handler_t post_handler;
/*
* ... called if executing addr causes a fault (eg. page fault).
* Return 1 if it handled fault, otherwise kernel will see it.
*/
kprobe_fault_handler_t fault_handler;
/*
* ... called if breakpoint trap occurs in probe handler.
* Return 1 if it handled break, otherwise kernel will see it.
*/
kprobe_break_handler_t break_handler;
/*opcode 以及 ainsn 用于保存被替换的指令码*/
/* Saved opcode (which has been replaced with breakpoint) */
kprobe_opcode_t opcode;
/* copy of the original instruction */
struct arch_specific_insn ainsn;
/*
* Indicates various status flags.
* Protected by kprobe_mutex after this kprobe is registered.
*/
u32 flags;
};
对于kprobe功能的实现主要利用了内核中的两个功能特性:异常(尤其是int 3),单步执行(EFLAGS中的TF标志)。 大概的流程: 1)在注册探测点的时候,对被探测函数的指令码进行替换,替换为int 3的指令码; 2)在执行int 3的异常执行中,通过通知链的方式调用kprobe的异常处理函数; 3)在kprobe的异常出来函数中,判断是否存在pre_handler钩子,存在则执行; 4)执行完后,准备进入单步调试,通过设置EFLAGS中的TF标志位,并且把异常返回的地址修改为保存的原指令码; 5)代码返回,执行原有指令,执行结束后触发单步异常; 6)在单步异常的处理中,清除单步标志,执行post_handler流程,并最终返回;
下面又进入代码时间,首先看一下kprobe模块的初始化代码,初始化代码主要做了两件事:标记出哪些代码是不能被探测的,这些代码属于kprobe实现的关键代码;注册通知链到die_notifier,用于接收异常通知。 点击(此处)折叠或打开
初始化代码位于kernel/kprobes.c中
static int __init init_kprobes(void)
{
int i, err = 0;
....
/*kprobe_blacklist中保存的是kprobe实现的关键代码路径,这些函数不应该被kprobe探测*/
/*
* Lookup and populate the kprobe_blacklist.
*
* Unlike the kretprobe blacklist, we'll need to determine
* the range of addresses that belong to the said functions,
* since a kprobe need not necessarily be at the beginning
* of a function.
*/
for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
kprobe_lookup_name(kb->name, addr);
if (!addr)
continue;
kb->start_addr = (unsigned long)addr;
symbol_name = kallsyms_lookup(kb->start_addr,
&size, &offset, &modname, namebuf);
if (!symbol_name)
kb->range = 0;
else
kb->range = size;
}
....
if (!err)
/*注册通知链到die_notifier,用于接收int 3的异常信息*/
err = register_die_notifier(&kprobe_exceptions_nb);
....
}
其中的通知链:
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
/*优先级最高,保证最先执行*/
.priority = 0x7fffffff /* we need to be notified first */
};
kprobe的注册流程register_kprobe。
点击(此处)折叠或打开
int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *old_p;
struct module *probed_mod;
kprobe_opcode_t *addr;
/*获取被探测点的地址,指定了symbol_name,则从kallsyms中获取;指定了offset,则返回addr + offset*/
addr = kprobe_addr(p);
if (!addr)
return -EINVAL;
p->addr = addr;
/*判断同一个kprobe是否被重复注册*/
ret = check_kprobe_rereg(p);
if (ret)
return ret;
jump_label_lock();
preempt_disable();
/*判断被注册的函数是否位于内核的代码段内,或位于不能探测的kprobe实现路径中*/
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr) ||
ftrace_text_reserved(p->addr, p->addr) ||
jump_label_text_reserved(p->addr, p->addr))
goto fail_with_jump_label;
/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
p->flags &= KPROBE_FLAG_DISABLED;
/*
* Check if are we probing a module.
*/
/*判断被探测的地址是否属于某一个模块,并且位于模块的text section内*/
probed_mod = __module_text_address((unsigned long) p->addr);
if (probed_mod) {
/*如果被探测的为模块地址,首先要增加模块的引用计数*/
/*
* We must hold a refcount of the probed module while updating
* its code to prohibit unexpected unloading.
*/
if (unlikely(!try_module_get(probed_mod)))
goto fail_with_jump_label;
/*
* If the module freed .init.text, we couldn't insert
* kprobes in there.
*/
/*如果被探测的地址位于模块的init地址段内,但该段代码区间已被释放,则直接退出*/
if (within_module_init((unsigned long)p->addr, probed_mod) &&
probed_mod->state != MODULE_STATE_COMING) {
module_put(probed_mod);
goto fail_with_jump_label;
}
}
preempt_enable();
jump_label_unlock();
p->nmissed = 0;
INIT_LIST_HEAD(&p->list);
mutex_lock(&kprobe_mutex);
jump_label_lock(); /* needed to call jump_label_text_reserved() */
get_online_cpus(); /* For avoiding text_mutex deadlock. */
mutex_lock(&text_mutex);
/*判断在同一个探测点是否已经注册了其他的探测函数*/
old_p = get_kprobe(p->addr);
if (old_p) {
/* Since this may unoptimize old_p, locking text_mutex. */
/*如果已经存在注册过的kprobe,则将探测点的函数修改为aggr_pre_handler,并将所有的handler挂载到其链表上,由其负责所有handler函数的执行*/
ret = register_aggr_kprobe(old_p, p);
goto out;
}
/* 分配特定的内存地址用于保存原有的指令
* 按照内核注释,被分配的地址必须must be on special executable page on x86.
* 该地址被保存在kprobe->ainsn.insn
*/
ret = arch_prepare_kprobe(p);
if (ret)
goto out;
/*将kprobe加入到相应的hash表内*/
INIT_HLIST_NODE(&p->hlist);
hlist_add_head_rcu(&p->hlist,
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
if (!kprobes_all_disarmed && !kprobe_disabled(p))
/*将探测点的指令码修改为int 3指令*/
__arm_kprobe(p);
/* Try to optimize kprobe */
try_to_optimize_kprobe(p);
out:
mutex_unlock(&text_mutex);
put_online_cpus();
jump_label_unlock();
mutex_unlock(&kprobe_mutex);
if (probed_mod)
module_put(probed_mod);
return ret;
fail_with_jump_label:
preempt_enable();
jump_label_unlock();
return -EINVAL;
注册完毕,就开始kprobe的执行流程了。对于该探测点,由于其起始指令已经被修改为int3,因此在执行到该地址时,必然会触发3号中断向量的处理流程do_int3.
点击(此处)折叠或打开
/* May run on IST stack. */
dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
#ifdef CONFIG_KPROBES
/*在这里以DIE_INT3,通知kprobe注册的通知链*/
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
#else
if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
#endif
preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
}
在do_int3中触发kprobe注册的通知链函数,kprobe_exceptions_notify。由于kprobe以及jprobe等机制的处理核心都在此函数内,这里只针对kprobe的流程进行分析:进入函数的原因是DIE_INT3,并且是第一次进入该函数。
点击(此处)折叠或打开
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct die_args *args = data;
int ret = NOTIFY_DONE;
if (args->regs && user_mode_vm(args->regs))
return ret;
switch (val) {
case DIE_INT3:
/*对于kprobe,进入kprobe_handle*/
if (kprobe_handler(args->regs))
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
if (post_kprobe_handler(args->regs)) {
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
}
break;
case DIE_GPF:
/*
* To be potentially processing a kprobe fault and to
* trust the result from kprobe_running(), we have
* be non-preemptible.
*/
if (!preemptible() && kprobe_running() &&
kprobe_fault_handler(args->regs, args->trapnr))
ret = NOTIFY_STOP;
break;
default:
break;
}
return ret;
}
点击(此处)折叠或打开
static int __kprobes kprobe_handler(struct pt_regs *regs)
{
kprobe_opcode_t *addr;
struct kprobe *p;
struct kprobe_ctlblk *kcb;
/*对于int 3中断,其被Intel定义为Trap,那么异常发生时EIP寄存器内指向的为异常指令的后一条指令*/
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
/*
* We don't want to be preempted for the entire
* duration of kprobe processing. We conditionally
* re-enable preemption at the end of this function,
* and also in reenter_kprobe() and setup_singlestep().
*/
preempt_disable();
kcb = get_kprobe_ctlblk();
/*获取addr对应的kprobe*/
p = get_kprobe(addr);
if (p) {
/*如果异常的进入是由kprobe导致,则进入reenter_kprobe(jprobe需要,到时候分析)*/
if (kprobe_running()) {
if (reenter_kprobe(p, regs, kcb))
return 1;
} else {
set_current_kprobe(p, regs, kcb);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
/*
* If we have no pre-handler or it returned 0, we
* continue with normal processing. If we have a
* pre-handler and it returned non-zero, it prepped
* for calling the break_handler below on re-entry
* for jprobe processing, so get out doing nothing
* more here.
*/
/*执行在此地址上挂载的pre_handle函数*/
if (!p->pre_handler || !p->pre_handler(p, regs))
/*设置单步调试模式,为post_handle函数的执行做准备*/
setup_singlestep(p, regs, kcb, 0);
return 1;
}
} else if (*addr != BREAKPOINT_INSTRUCTION) {
/*
* The breakpoint instruction was removed right
* after we hit it. Another cpu has removed
* either a probepoint or a debugger breakpoint
* at this address. In either case, no further
* handling of this interrupt is appropriate.
* Back up over the (now missing) int3 and run
* the original instruction.
*/
regs->ip = (unsigned long)addr;
preempt_enable_no_resched();
return 1;
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
preempt_enable_no_resched();
return 0;
}
点击(此处)折叠或打开
static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, int reenter)
{
if (setup_detour_execution(p, regs, reenter))
return;
#if !defined(CONFIG_PREEMPT)
if (p->ainsn.boostable == 1 && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
if (!reenter)
reset_current_kprobe();
/*
* Reentering boosted probe doesn't reset current_kprobe,
* nor set current_kprobe, because it doesn't use single
* stepping.
*/
regs->ip = (unsigned long)p->ainsn.insn;
preempt_enable_no_resched();
return;
}
#endif
/*jprobe*/
if (reenter) {
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
kcb->kprobe_status = KPROBE_REENTER;
} else
kcb->kprobe_status = KPROBE_HIT_SS;
/* Prepare real single stepping */
/*准备单步模式,设置EFLAGS的TF标志位,清楚IF标志位(禁止中断)*/
clear_btf();
regs->flags |= X86_EFLAGS_TF;
regs->flags &= ~X86_EFLAGS_IF;
/* single step inline if the instruction is an int3 */
if (p->opcode == BREAKPOINT_INSTRUCTION)
regs->ip = (unsigned long)p->addr;
else
/*设置异常返回的指令为保存的被探测点的指令*/
regs->ip = (unsigned long)p->ainsn.insn;
}
对应kprobe,pre_handle的执行就结束了,按照代码,程序开始执行保存的被探测点的指令,由于开启了单步调试模式,执行完指令后会继续触发异常,这次的是do_debug异常处理流程。
点击(此处)折叠或打开
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
....
/*在do_debug中,以DIE_DEBUG再一次触发kprobe的通知链*/
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
SIGTRAP) == NOTIFY_STOP)
return;
....
return;
}
点击(此处)折叠或打开
/*对于kprobe_exceptions_notify,其DIE_DEBUG处理流程*/
case DIE_DEBUG:
if (post_kprobe_handler(args->regs)) {
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
}
break;
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (!cur)
return 0;
/*设置异常返回的EIP为下一条需要执行的指令*/
resume_execution(cur, regs, kcb);
/*恢复异常执行前的EFLAGS*/
regs->flags |= kcb->kprobe_saved_flags;
/*执行post_handler函数*/
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
cur->post_handler(cur, regs, 0);
}
/* Restore back the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
restore_previous_kprobe(kcb);
goto out;
}
reset_current_kprobe();
out:
preempt_enable_no_resched();
/*
* if somebody else is singlestepping across a probe point, flags
* will have TF set, in which case, continue the remaining processing
* of do_debug, as if this is not a probe hit.
*/
if (regs->flags & X86_EFLAGS_TF)
return 0;
return 1;
}
至此,一个典型的kprobe的流程已经执行完毕了。
jprobe、kretprobe to be continued...
参考链接: 1)http://www.ibm.com/developerworks/cn/linux/l-cn-systemtap1/index.html 2)http://blog.chinaunix.net/uid-23769728-id-3198044.html 3)http://zfpillar.devebar.net/2008/11/821.html 4)2.6.38 linux kernel(RTFC)
struct kprobe {
/*用于保存kprobe的全局hash表,以被探测的addr为key*/
struct hlist_node hlist;
/* list of kprobes for multi-handler support */
/*当对同一个探测点存在多个探测函数时,所有的函数挂在这条链上*/
struct list_head list;
/*count the number of times this probe was temporarily disarmed */
unsigned long nmissed;
/* location of the probe point */
/*被探测的目标地址*/
kprobe_opcode_t *addr;
/* Allow user to indicate symbol name of the probe point */
/*symblo_name的存在,允许用户指定函数名而非确定的地址*/
const char *symbol_name;
/* Offset into the symbol */
/*如果被探测点为函数内部某个指令,需要使用addr + offset的方式*/
unsigned int offset;
/* Called before addr is executed. */
/*探测函数,在目标探测点执行之前调用*/
kprobe_pre_handler_t pre_handler;
/* Called after addr is executed, unless... */
/*探测函数,在目标探测点执行之后调用*/
kprobe_post_handler_t post_handler;
/*
* ... called if executing addr causes a fault (eg. page fault).
* Return 1 if it handled fault, otherwise kernel will see it.
*/
kprobe_fault_handler_t fault_handler;
/*
* ... called if breakpoint trap occurs in probe handler.
* Return 1 if it handled break, otherwise kernel will see it.
*/
kprobe_break_handler_t break_handler;
/*opcode 以及 ainsn 用于保存被替换的指令码*/
/* Saved opcode (which has been replaced with breakpoint) */
kprobe_opcode_t opcode;
/* copy of the original instruction */
struct arch_specific_insn ainsn;
/*
* Indicates various status flags.
* Protected by kprobe_mutex after this kprobe is registered.
*/
u32 flags;
};
对于kprobe功能的实现主要利用了内核中的两个功能特性:异常(尤其是int 3),单步执行(EFLAGS中的TF标志)。 大概的流程: 1)在注册探测点的时候,对被探测函数的指令码进行替换,替换为int 3的指令码; 2)在执行int 3的异常执行中,通过通知链的方式调用kprobe的异常处理函数; 3)在kprobe的异常出来函数中,判断是否存在pre_handler钩子,存在则执行; 4)执行完后,准备进入单步调试,通过设置EFLAGS中的TF标志位,并且把异常返回的地址修改为保存的原指令码; 5)代码返回,执行原有指令,执行结束后触发单步异常; 6)在单步异常的处理中,清除单步标志,执行post_handler流程,并最终返回;
下面又进入代码时间,首先看一下kprobe模块的初始化代码,初始化代码主要做了两件事:标记出哪些代码是不能被探测的,这些代码属于kprobe实现的关键代码;注册通知链到die_notifier,用于接收异常通知。 点击(此处)折叠或打开
初始化代码位于kernel/kprobes.c中
static int __init init_kprobes(void)
{
int i, err = 0;
....
/*kprobe_blacklist中保存的是kprobe实现的关键代码路径,这些函数不应该被kprobe探测*/
/*
* Lookup and populate the kprobe_blacklist.
*
* Unlike the kretprobe blacklist, we'll need to determine
* the range of addresses that belong to the said functions,
* since a kprobe need not necessarily be at the beginning
* of a function.
*/
for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
kprobe_lookup_name(kb->name, addr);
if (!addr)
continue;
kb->start_addr = (unsigned long)addr;
symbol_name = kallsyms_lookup(kb->start_addr,
&size, &offset, &modname, namebuf);
if (!symbol_name)
kb->range = 0;
else
kb->range = size;
}
....
if (!err)
/*注册通知链到die_notifier,用于接收int 3的异常信息*/
err = register_die_notifier(&kprobe_exceptions_nb);
....
}
其中的通知链:
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
/*优先级最高,保证最先执行*/
.priority = 0x7fffffff /* we need to be notified first */
};
kprobe的注册流程register_kprobe。
点击(此处)折叠或打开
int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *old_p;
struct module *probed_mod;
kprobe_opcode_t *addr;
/*获取被探测点的地址,指定了symbol_name,则从kallsyms中获取;指定了offset,则返回addr + offset*/
addr = kprobe_addr(p);
if (!addr)
return -EINVAL;
p->addr = addr;
/*判断同一个kprobe是否被重复注册*/
ret = check_kprobe_rereg(p);
if (ret)
return ret;
jump_label_lock();
preempt_disable();
/*判断被注册的函数是否位于内核的代码段内,或位于不能探测的kprobe实现路径中*/
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr) ||
ftrace_text_reserved(p->addr, p->addr) ||
jump_label_text_reserved(p->addr, p->addr))
goto fail_with_jump_label;
/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
p->flags &= KPROBE_FLAG_DISABLED;
/*
* Check if are we probing a module.
*/
/*判断被探测的地址是否属于某一个模块,并且位于模块的text section内*/
probed_mod = __module_text_address((unsigned long) p->addr);
if (probed_mod) {
/*如果被探测的为模块地址,首先要增加模块的引用计数*/
/*
* We must hold a refcount of the probed module while updating
* its code to prohibit unexpected unloading.
*/
if (unlikely(!try_module_get(probed_mod)))
goto fail_with_jump_label;
/*
* If the module freed .init.text, we couldn't insert
* kprobes in there.
*/
/*如果被探测的地址位于模块的init地址段内,但该段代码区间已被释放,则直接退出*/
if (within_module_init((unsigned long)p->addr, probed_mod) &&
probed_mod->state != MODULE_STATE_COMING) {
module_put(probed_mod);
goto fail_with_jump_label;
}
}
preempt_enable();
jump_label_unlock();
p->nmissed = 0;
INIT_LIST_HEAD(&p->list);
mutex_lock(&kprobe_mutex);
jump_label_lock(); /* needed to call jump_label_text_reserved() */
get_online_cpus(); /* For avoiding text_mutex deadlock. */
mutex_lock(&text_mutex);
/*判断在同一个探测点是否已经注册了其他的探测函数*/
old_p = get_kprobe(p->addr);
if (old_p) {
/* Since this may unoptimize old_p, locking text_mutex. */
/*如果已经存在注册过的kprobe,则将探测点的函数修改为aggr_pre_handler,并将所有的handler挂载到其链表上,由其负责所有handler函数的执行*/
ret = register_aggr_kprobe(old_p, p);
goto out;
}
/* 分配特定的内存地址用于保存原有的指令
* 按照内核注释,被分配的地址必须must be on special executable page on x86.
* 该地址被保存在kprobe->ainsn.insn
*/
ret = arch_prepare_kprobe(p);
if (ret)
goto out;
/*将kprobe加入到相应的hash表内*/
INIT_HLIST_NODE(&p->hlist);
hlist_add_head_rcu(&p->hlist,
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
if (!kprobes_all_disarmed && !kprobe_disabled(p))
/*将探测点的指令码修改为int 3指令*/
__arm_kprobe(p);
/* Try to optimize kprobe */
try_to_optimize_kprobe(p);
out:
mutex_unlock(&text_mutex);
put_online_cpus();
jump_label_unlock();
mutex_unlock(&kprobe_mutex);
if (probed_mod)
module_put(probed_mod);
return ret;
fail_with_jump_label:
preempt_enable();
jump_label_unlock();
return -EINVAL;
注册完毕,就开始kprobe的执行流程了。对于该探测点,由于其起始指令已经被修改为int3,因此在执行到该地址时,必然会触发3号中断向量的处理流程do_int3.
点击(此处)折叠或打开
/* May run on IST stack. */
dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
#ifdef CONFIG_KPROBES
/*在这里以DIE_INT3,通知kprobe注册的通知链*/
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
#else
if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
#endif
preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
}
在do_int3中触发kprobe注册的通知链函数,kprobe_exceptions_notify。由于kprobe以及jprobe等机制的处理核心都在此函数内,这里只针对kprobe的流程进行分析:进入函数的原因是DIE_INT3,并且是第一次进入该函数。
点击(此处)折叠或打开
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct die_args *args = data;
int ret = NOTIFY_DONE;
if (args->regs && user_mode_vm(args->regs))
return ret;
switch (val) {
case DIE_INT3:
/*对于kprobe,进入kprobe_handle*/
if (kprobe_handler(args->regs))
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
if (post_kprobe_handler(args->regs)) {
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
}
break;
case DIE_GPF:
/*
* To be potentially processing a kprobe fault and to
* trust the result from kprobe_running(), we have
* be non-preemptible.
*/
if (!preemptible() && kprobe_running() &&
kprobe_fault_handler(args->regs, args->trapnr))
ret = NOTIFY_STOP;
break;
default:
break;
}
return ret;
}
点击(此处)折叠或打开
static int __kprobes kprobe_handler(struct pt_regs *regs)
{
kprobe_opcode_t *addr;
struct kprobe *p;
struct kprobe_ctlblk *kcb;
/*对于int 3中断,其被Intel定义为Trap,那么异常发生时EIP寄存器内指向的为异常指令的后一条指令*/
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
/*
* We don't want to be preempted for the entire
* duration of kprobe processing. We conditionally
* re-enable preemption at the end of this function,
* and also in reenter_kprobe() and setup_singlestep().
*/
preempt_disable();
kcb = get_kprobe_ctlblk();
/*获取addr对应的kprobe*/
p = get_kprobe(addr);
if (p) {
/*如果异常的进入是由kprobe导致,则进入reenter_kprobe(jprobe需要,到时候分析)*/
if (kprobe_running()) {
if (reenter_kprobe(p, regs, kcb))
return 1;
} else {
set_current_kprobe(p, regs, kcb);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
/*
* If we have no pre-handler or it returned 0, we
* continue with normal processing. If we have a
* pre-handler and it returned non-zero, it prepped
* for calling the break_handler below on re-entry
* for jprobe processing, so get out doing nothing
* more here.
*/
/*执行在此地址上挂载的pre_handle函数*/
if (!p->pre_handler || !p->pre_handler(p, regs))
/*设置单步调试模式,为post_handle函数的执行做准备*/
setup_singlestep(p, regs, kcb, 0);
return 1;
}
} else if (*addr != BREAKPOINT_INSTRUCTION) {
/*
* The breakpoint instruction was removed right
* after we hit it. Another cpu has removed
* either a probepoint or a debugger breakpoint
* at this address. In either case, no further
* handling of this interrupt is appropriate.
* Back up over the (now missing) int3 and run
* the original instruction.
*/
regs->ip = (unsigned long)addr;
preempt_enable_no_resched();
return 1;
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
preempt_enable_no_resched();
return 0;
}
点击(此处)折叠或打开
static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, int reenter)
{
if (setup_detour_execution(p, regs, reenter))
return;
#if !defined(CONFIG_PREEMPT)
if (p->ainsn.boostable == 1 && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
if (!reenter)
reset_current_kprobe();
/*
* Reentering boosted probe doesn't reset current_kprobe,
* nor set current_kprobe, because it doesn't use single
* stepping.
*/
regs->ip = (unsigned long)p->ainsn.insn;
preempt_enable_no_resched();
return;
}
#endif
/*jprobe*/
if (reenter) {
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
kcb->kprobe_status = KPROBE_REENTER;
} else
kcb->kprobe_status = KPROBE_HIT_SS;
/* Prepare real single stepping */
/*准备单步模式,设置EFLAGS的TF标志位,清楚IF标志位(禁止中断)*/
clear_btf();
regs->flags |= X86_EFLAGS_TF;
regs->flags &= ~X86_EFLAGS_IF;
/* single step inline if the instruction is an int3 */
if (p->opcode == BREAKPOINT_INSTRUCTION)
regs->ip = (unsigned long)p->addr;
else
/*设置异常返回的指令为保存的被探测点的指令*/
regs->ip = (unsigned long)p->ainsn.insn;
}
对应kprobe,pre_handle的执行就结束了,按照代码,程序开始执行保存的被探测点的指令,由于开启了单步调试模式,执行完指令后会继续触发异常,这次的是do_debug异常处理流程。
点击(此处)折叠或打开
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
....
/*在do_debug中,以DIE_DEBUG再一次触发kprobe的通知链*/
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
SIGTRAP) == NOTIFY_STOP)
return;
....
return;
}
点击(此处)折叠或打开
/*对于kprobe_exceptions_notify,其DIE_DEBUG处理流程*/
case DIE_DEBUG:
if (post_kprobe_handler(args->regs)) {
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
}
break;
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (!cur)
return 0;
/*设置异常返回的EIP为下一条需要执行的指令*/
resume_execution(cur, regs, kcb);
/*恢复异常执行前的EFLAGS*/
regs->flags |= kcb->kprobe_saved_flags;
/*执行post_handler函数*/
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
cur->post_handler(cur, regs, 0);
}
/* Restore back the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
restore_previous_kprobe(kcb);
goto out;
}
reset_current_kprobe();
out:
preempt_enable_no_resched();
/*
* if somebody else is singlestepping across a probe point, flags
* will have TF set, in which case, continue the remaining processing
* of do_debug, as if this is not a probe hit.
*/
if (regs->flags & X86_EFLAGS_TF)
return 0;
return 1;
}
至此,一个典型的kprobe的流程已经执行完毕了。
jprobe、kretprobe to be continued...
参考链接: 1)http://www.ibm.com/developerworks/cn/linux/l-cn-systemtap1/index.html 2)http://blog.chinaunix.net/uid-23769728-id-3198044.html 3)http://zfpillar.devebar.net/2008/11/821.html 4)2.6.38 linux kernel(RTFC)
相关文章推荐
- 世界上最差的系统就是linux,双击不能安装软件
- linux awk命令详解
- linux下为php添加curl扩展的方法
- (OK) 调试cBPM—CentOS7—gdb—gdbserver—问题的解决—3—段错误
- Hashcat——Cracking WPA2 WPA with Hashcat in Kali Linux
- (OK) 调试cBPM—CentOS7—gdb—gdbserver—问题的解决—4—段错误
- 2013上半年—Linux操作系统—中考
- arm linux 内核生成过程
- (OK) mkisofs——in linux
- 向日葵远程控制软件发布Linux被控端1.0Beta版
- 2014上半年—Linux操作系统—嵌入式开发—中考
- Skype—set font size —in CentOS/Fedora/RHEL—修改Skype字体大小
- 打造自己的多功能USB启动盘——grub2引导WinPE、Archlinux安装镜像和Ubuntu liveCD
- (OK) 运行cBPM—CentOS7
- jq : Linux下json的命令行工具
- Andriod被排出Linux内核的原因
- centos搭建SmokePing监控IDC丢包延迟
- Linux实时性分析-schedule-调度器
- 2014 红帽 峰会
- 嵌入式linux和嵌入式android系统有什么区别和联系?