x86 kernel 中断分析三——中断处理流程
2017-01-03 15:12
543 查看
CPU检测中断
CPU在执行每条程序之前会检测是否有中断到达,即中断控制器是否有发送中断信号过来查找IDT
CPU根据中断向量到IDT中读取对应的中断描述符表项,根据段选择符合偏移确定中断服务程序的地址见附录2interrupt数组
在分析一中,我们看到,填充IDT中断服务程序的是interrupt数组的内容,所以第2步跳转到interrupt数组对应的表项,表项的内容之前也已分析过push vector num and jmp to common_interrupt
778 /* 779 * the CPU automatically disables interrupts when executing an IRQ vector, 780 * so IRQ-flags tracing has to follow that: 781 */ 782 .p2align CONFIG_X86_L1_CACHE_SHIFT 783 common_interrupt: 784 ASM_CLAC 785 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ 786 SAVE_ALL 787 TRACE_IRQS_OFF 788 movl %esp,%eax 789 call do_IRQ 790 jmp ret_from_intr 791 ENDPROC(common_interrupt) 792 CFI_ENDPROC
addl $-0x80,(%esp)
根据第一篇分析,此时栈顶是(~vector + 0x80),这里减去0x80,所以值为vector num取反,范围在[-256, -1]。这么做是为了和系统调用区分,正值为系统调用号,负值为中断向量。SAVE_ALL
保存现场,将所有寄存器的值压栈(cs eip ss esp由系统自动保存)186 .macro SAVE_ALL 187 cld 188 PUSH_GS 189 pushl_cfi %fs 190 /*CFI_REL_OFFSET fs, 0;*/ 191 pushl_cfi %es 192 /*CFI_REL_OFFSET es, 0;*/ 193 pushl_cfi %ds 194 /*CFI_REL_OFFSET ds, 0;*/ 195 pushl_cfi %eax 196 CFI_REL_OFFSET eax, 0 197 pushl_cfi %ebp 198 CFI_REL_OFFSET ebp, 0 199 pushl_cfi %edi 200 CFI_REL_OFFSET edi, 0 201 pushl_cfi %esi 202 CFI_REL_OFFSET esi, 0 203 pushl_cfi %edx 204 CFI_REL_OFFSET edx, 0 205 pushl_cfi %ecx 206 CFI_REL_OFFSET ecx, 0 207 pushl_cfi %ebx 208 CFI_REL_OFFSET ebx, 0 209 movl $(__USER_DS), %edx 210 movl %edx, %ds 211 movl %edx, %es 212 movl $(__KERNEL_PERCPU), %edx 213 movl %edx, %fs 214 SET_KERNEL_GS %edx 215 .endm
movl %esp,%eax
将esp的值赋值给eax,eax作为do_IRQ的第一个参数,esp的值是以上压栈的寄存器的内容,以pt_reg形式传过去。call do_IRQ
175 /* 176 * do_IRQ handles all normal device IRQ's (the special 177 * SMP cross-CPU interrupts have their own specific 178 * handlers). 179 */ 180 __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) 181 { 182 struct pt_regs *old_regs = set_irq_regs(regs); 183 184 /* high bit used in ret_from_ code */ 185 unsigned vector = ~regs->orig_ax; //获取向量号,这里有一个取反的操作,与之前的取反相对应得到正的向量号 186 unsigned irq; 187 188 irq_enter(); 189 exit_idle(); 190 191 irq = __this_cpu_read(vector_irq[vector]); //通过向量号得到中断号 192 193 if (!handle_irq(irq, regs)) { 194 ack_APIC_irq(); 195 196 if (irq != VECTOR_RETRIGGERED) { 197 pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n", 198 __func__, smp_processor_id(), 199 vector, irq); 200 } else { 201 __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); 202 } 203 } 204 205 irq_exit(); 206 207 set_irq_regs(old_regs); 208 return 1; 209 }
irq_enter
319 /* 320 * Enter an interrupt context. //进入中断上下文,因为首先处理的是硬中断,所以我们可以把irq_enter认为是硬中断的开始 321 */ 322 void irq_enter(void) 323 { 324 rcu_irq_enter(); //inform RCU that current CPU is entering irq away from idle 325 if (is_idle_task(current) && !in_interrupt()) { //如果当前是pid==0的idle task并且不处于中断上下文中 326 /* 327 * Prevent raise_softirq from needlessly waking up ksoftirqd 328 * here, as softirq will be serviced on return from interrupt. 329 */ 330 local_bh_disable(); 331 tick_irq_enter(); //idle进程会被中断或者其他进程抢占,在系统中断过程中用irq_enter->tick_irq_enter()恢复周期性tick以得到正确的jiffies值(这段注释摘录自http://blog.chinaunix.net/uid-29675110-id-4365095.html) 332 _local_bh_enable(); 333 } 334 335 __irq_enter(); 336 }
__irq_enter
28 /* 29 * It is safe to do non-atomic ops on ->hardirq_context, 30 * because NMI handlers may not preempt and the ops are 31 * always balanced, so the interrupted value of ->hardirq_context 32 * will always be restored. 33 */ 34 #define __irq_enter() \ 35 do { \ 36 account_irq_enter_time(current); \ 37 preempt_count_add(HARDIRQ_OFFSET); \ //HARDIRQ_OFFSET等于1左移16位,即将preempt_count第16 bit加1,preempt_count的格式见附录 38 trace_hardirq_enter(); \ 39 } while (0)
exit_idle
如果系统正处在idle状态,那么退出IDLE258 /* Called from interrupts to signify idle end */ 259 void exit_idle(void) 260 { 261 /* idle loop has pid 0 */ //如果当前进程不为0,直接退出,不需要退出 idle 262 if (current->pid) 263 return; 264 __exit_idle(); //如果是idle进程,那么通过__exit_idle调用一系列notification 265 }
handle_irq
165 bool handle_irq(unsigned irq, struct pt_regs *regs) 166 { 167 struct irq_desc *desc; 168 int overflow; 169 170 overflow = check_stack_overflow(); //x86架构下如果sp指针距离栈底的位置小于1KB,则认为有stack overflow的风险 171 172 desc = irq_to_desc(irq); //获取desc,从刚开始的vector num-->irq num--> desc 173 if (unlikely(!desc)) 174 return false; 175 //如果发生中断时,CPU正在执行用户空间的代码,处理中断需切换到内核栈,但此时内核栈是空的,所以无需再切换到中断栈 176 if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) { // 在CPU的irq stack执行,否则在当前进程的栈执行,调用下面的desc->handle_irq 177 if (unlikely(overflow)) 178 print_stack_overflow(); 179 desc->handle_irq(irq, desc); 180 } 181 182 return true; 183 }
中断栈的定义及初始化
按照目前的内核设计,中断有自己的栈,用来执行中断服务程序,这样是为了防止中断嵌套破坏与之共享的中断栈的定义,可以看到与进程上下文的布局相同,thread info + stack
58 /* 59 * per-CPU IRQ handling contexts (thread information and stack) 60 */ 61 union irq_ctx { 62 struct thread_info tinfo; 63 u32 stack[THREAD_SIZE/sizeof(u32)]; 64 } __attribute__((aligned(THREAD_SIZE)));
中断栈的初始化:
创建percpu变量hardirq_ctx和softirq_ctx,类型为irq_ctx,所以每个cpu的软硬中断有各自的stack
66 static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); 67 static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
native_init_IRQ->irq_ctx_init
hardirq_ctx和softirq_ctx的初始化方式相同,如下
116 /* 117 * allocate per-cpu stacks for hardirq and for softirq processing 118 */ 119 void irq_ctx_init(int cpu) 120 { 121 union irq_ctx *irqctx; 122 123 if (per_cpu(hardirq_ctx, cpu)) 124 return; 125 126 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), //分配2个page 127 THREADINFO_GFP, 128 THREAD_SIZE_ORDER)); 129 memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); //初始化其中的部分成员 130 irqctx->tinfo.cpu = cpu; 131 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 132 133 per_cpu(hardirq_ctx, cpu) = irqctx; //赋值给hardirq_ctx 134 135 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), 136 THREADINFO_GFP, 137 THREAD_SIZE_ORDER)); 138 memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); 139 irqctx->tinfo.cpu = cpu; 140 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 141 142 per_cpu(softirq_ctx, cpu) = irqctx; 143 144 printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", 145 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); 146 }
网上找的一张图,如下
中断栈的切换
发生中断时需要从当前进程栈切换到中断栈80 static inline int 81 execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) 82 { 83 union irq_ctx *curctx, *irqctx; 84 u32 *isp, arg1, arg2; 85 86 curctx = (union irq_ctx *) current_thread_info(); //获取当前进程的process context,即栈的起始地址 87 irqctx = __this_cpu_read(hardirq_ctx); //获取硬中断的hardirq context,即栈的起始地址 88 89 /* 90 * this is where we switch to the IRQ stack. However, if we are 91 * already using the IRQ stack (because we interrupted a hardirq 92 * handler) we can't do that and just have to keep using the 93 * current stack (which is the irq stack already after all) 94 */ 95 if (unlikely(curctx == irqctx)) //如果当前进程的栈和中断栈相同,说明发生了中断嵌套,此时当前进程就是一个中断的服务例程 96 return 0; //这种情况下不能进行栈的切换,还是在当前栈中运行,只要返回0即可 97 98 /* build the stack frame on the IRQ stack */ 99 isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); //获取中断栈的isp 100 irqctx->tinfo.task = curctx->tinfo.task; //获取当前进程的task和stack point 101 irqctx->tinfo.previous_esp = current_stack_pointer; 102 103 if (unlikely(overflow)) 104 call_on_stack(print_stack_overflow, isp); 105 106 asm volatile("xchgl %%ebx,%%esp \n" //具体的栈切换发生在以下汇编中,基本上就是保存现场,进行切换,不深入研究汇编了... 107 "call *%%edi \n" 108 "movl %%ebx,%%esp \n" 109 : "=a" (arg1), "=d" (arg2), "=b" (isp) 110 : "0" (irq), "1" (desc), "2" (isp), 111 "D" (desc->handle_irq) //不管是共享栈还是独立栈,最后都会调用到irq desc对应的handle_irq 112 : "memory", "cc", "ecx"); 113 return 1; 114 }
handle_level_irq
kernel中对于中断有一系列的中断流处理函数handle_simple_irq 用于简易流控处理; handle_level_irq 用于电平触发中断的流控处理; handle_edge_irq 用于边沿触发中断的流控处理; handle_fasteoi_irq 用于需要响应eoi的中断控制器; handle_percpu_irq 用于只在单一cpu响应的中断; handle_nested_irq 用于处理使用线程的嵌套中断;
我们在第二篇分析中,init_ISA_irqs把legacy irq的中断流处理函数都设置为handle_level_irq,以此为例做分析:
//level type中断,当硬件中断line的电平处于active level时就一直保持有中断请求,这就要求处理中断过程中屏蔽中断,响应硬件后打开中断 387 /** 388 * handle_level_irq - Level type irq handler //电平触发的中断处理函数 389 * @irq: the interrupt number 390 * @desc: the interrupt description structure for this irq 391 * 392 * Level type interrupts are active as long as the hardware line has 393 * the active level. This may require to mask the interrupt and unmask 394 * it after the associated handler has acknowledged the device, so the 395 * interrupt line is back to inactive. 396 */ 397 void 398 handle_level_irq(unsigned int irq, struct irq_desc *desc) 399 { 400 raw_spin_lock(&desc->lock); //上锁 401 mask_ack_irq(desc); //mask对应的中断,否则一直接收来自interrupt line的中断信号 402 403 if (unlikely(irqd_irq_inprogress(&desc->irq_data))) //如果该中断正在其他cpu上被处理 404 if (!irq_check_poll(desc)) //这边不是很理解,irq的IRQS_POLL_INPROGRESS(polling in a progress)是什么意思?只能等后续代码遇到这个宏的时候再说。如果是在该状态,cpu relax,等待完成 405 goto out_unlock; //直接解锁退出 406 //清除IRQS_REPLAY和IRQS_WAITING标志位 407 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); 408 kstat_incr_irqs_this_cpu(irq, desc); //该CPU上该irq触发次数加1,总的中断触发次数加1 409 410 /* 411 * If its disabled or no action available 412 * keep it masked and get out of here 413 */ 414 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { 415 desc->istate |= IRQS_PENDING; //设置为pending 416 goto out_unlock; 417 } 418 419 handle_irq_event(desc); //核心函数 420 421 cond_unmask_irq(desc); //使能中断线 422 423 out_unlock: 424 raw_spin_unlock(&desc->lock); 425 } 426 EXPORT_SYMBOL_GPL(handle_level_irq);
handle irq event
182 irqreturn_t handle_irq_event(struct irq_desc *desc) 183 { 184 struct irqaction *action = desc->action; //获取irqaction链表 185 irqreturn_t ret; 186 187 desc->istate &= ~IRQS_PENDING; //正式进入处理流程,清除irq desc的pending标志位 188 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); //处理中断前设置IRQD_IRQ_INPROGRESS标志 189 raw_spin_unlock(&desc->lock); 190 191 ret = handle_irq_event_percpu(desc, action); 192 193 raw_spin_lock(&desc->lock); 194 irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); //处理中断后清除IRQD_IRQ_INPROGRESS标志 195 return ret; 196 }
handle_irq_event_percpu
132 irqreturn_t 133 handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) 134 { 135 irqreturn_t retval = IRQ_NONE; 136 unsigned int flags = 0, irq = desc->irq_data.irq; 137 138 do { 139 irqreturn_t res; 140 141 trace_irq_handler_entry(irq, action); 142 res = action->handler(irq, action->dev_id); //调用硬中断处理函数 143 trace_irq_handler_exit(irq, action, res); 144 145 if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n", 146 irq, action->handler)) 147 local_irq_disable(); 148 149 switch (res) { 150 case IRQ_WAKE_THREAD: //线程化中断的硬中断,通常只是响应一下硬件ack,就返会IRQ_WAKE_THREAD,唤醒软中断线程 151 /* 152 * Catch drivers which return WAKE_THREAD but 153 * did not set up a thread function 154 */ 155 if (unlikely(!action->thread_fn)) { 156 warn_no_thread(irq, action); 157 break; 158 } 159 160 irq_wake_thread(desc, action); //唤醒软中断线程 161 162 /* Fall through to add to randomness */ 163 case IRQ_HANDLED: //表示已经在硬中断中处理完毕 164 flags |= action->flags; 165 break; 166 167 default: 168 break; 169 } 170 171 retval |= res; 172 action = action->next; //对于共享中断,所有irqaction挂在同一desc下 173 } while (action); 174 175 add_interrupt_randomness(irq, flags); //这块代码其实和中断流程的关系不大,利用用户和外设作为噪声源,为内核随机熵池做贡献....(http://jingpin.jikexueyuan.com/article/23923.html) 176 177 if (!noirqdebug) 178 note_interrupt(irq, desc, retval); 179 return retval; 180 }
以上就是中断处理流程的简要分析,有个问题,中action的handler及线程化的软中断从何而来?下篇分析见。
附录1:
CPU使用IDT查到的中断服务程序的段选择符从GDT中取得相应的段描述符,段描述符里保存了中断服务程序的段基址和属性信息,此时CPU就得到了中断服务程序的起始地址。这里,CPU会根据当前cs寄存器里的CPL和GDT的段描述符的DPL,以确保中断服务程序是高于当前程序的,如果这次中断是编程异常(如:int 80h系统调用),那么还要检查CPL和IDT表中中断描述符的DPL,以保证当前程序有权限使用中断服务程序,这可以避免用户应用程序访问特殊的陷阱门和中断门[3]。如下图显示了从中断向量到GDT中相应中断服务程序起始位置的定位方式:
附录2. preempt_count:
44 #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) // 1左移16位 32 #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) // 8 + 8 = 16 31 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) // 0 + 8 = 8 30 #define PREEMPT_SHIFT 0 25 #define PREEMPT_BITS 8 26 #define SOFTIRQ_BITS 8 2500 void __kprobes preempt_count_add(int val) 2501 { 2502 #ifdef CONFIG_DEBUG_PREEMPT 2503 /* 2504 * Underflow? 2505 */ 2506 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) 2507 return; 2508 #endif 2509 __preempt_count_add(val); //除去debug相关的内容,只有这一行关键代码,将preempt_count中第16 bit加1 2510 #ifdef CONFIG_DEBUG_PREEMPT 2511 /* 2512 * Spinlock count overflowing soon? 2513 */ 2514 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= 2515 PREEMPT_MASK - 10); 2516 #endif 2517 if (preempt_count() == val) 2518 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); 2519 } 2520 EXPORT_SYMBOL(preempt_count_add);
preempt_count的布局如下:
相关文章推荐
- arm-Linux中断处理体系结构与处理流程分析
- 2440 ads启动代码的中断处理流程分析
- 自己动手写操作系统 第六章 号外:中断处理流程详细分析
- linux内核分析--中断处理流程
- 中断处理流程分析
- kernel 3.10内核源码分析--中断--中断和异常返回流程
- arm linux 下中断流程简要分析--中断处理流程
- 中断处理流程分析
- x86 kernel 中断机制分析一——IDT
- x86 kernel 中断机制分析二——irq_desc
- Exynos4412 中断驱动开发(二)—— 中断处理流程分析
- Exynos4412 中断驱动开发(二)—— 中断处理流程分析
- ARM中断处理流程的分析
- ARM中断处理流程的分析
- uc/os软件中断与硬件中断处理流程分析
- 网络处理的软中断机制分析
- S3C2410 && WinCE6.0的中断处理分析
- S3C2410 && WinCE6.0的中断处理分析
- 2410 中断过程处理分析
- 三读内核中断处理(3):中断处理流程