您的位置:首页 > 运维架构 > Linux

linux内核之中断实现原理

2017-06-09 00:00 543 查看

前言

本篇文章主要用来说明一下几个问题:

什么是中断,中断有哪些分类?

中断事件是如何从事件源一步步传递到内核层并调用相应的中断处理函数的?

内核启动过程中,中断系统是如何初始化的?

内核中,中断申请和释放的内部原理是什么?

注:本篇文章所使用源码依然基于ARM平台,内核版本为4.8.0。

中断

中断是一种cpu的异步事件,它会打断cpu的正常执行逻辑,强制cpu保存当前上下文(也可能手动保存)之后跳转到中断处理函数中进行中断处理。不同的cpu(即使是相同架构的cpu)一般会有不同的中断体系,比如具有不同的中断源的种类和数量等。中断和异常是有区别的,中断一般指的是外部的硬件中断和软件中断,而异常一般包含了中断,但是异常还包含了诸如预取指令异常等其他异常事件。异常在cpu中一般会以异常向量表的形式存在程序启动的最前面,比如复位向量就是cpu在上电或者复位的时候执行的代码入口,它一般在汇编层次完成cpu外设的基本初始化以及为接下来的c语言准备运行环境。

下面是ARM架构体系中的比较经典的异常向量表:



其中,swi就是软中断,linux内核中主要使用它来实现系统调用,具体可以参加我的另一篇文章:从glibc源码看系统调用原理 ,IRQ/FIQ就是硬件中断,IRQ/FIQ的区别不属于本文讨论的范畴。

内核中断

前面我提到了中断向量表,它是所有中断的入口,在linux内核中,向量表的定义在/arch/arm/kernel/entry-armv.S中:

.L__vectors_start:
W(b)	swi
W(b)	vector_und
W(ldr)	pc, .L__vectors_start + 0x1000  //swi对应中断向量,0x1000的出处见下文
W(b)	vector_pabt
W(b)	vector_dabt
W(b)	vector_addrexcptn
W(b)	vector_irq
W(b)	vector_fiq

本文主要关注硬件中断的处理逻辑,因为这是内核驱动开发时使用最多的中断(比如键盘中断、USB中断等),irq对应的中断处理逻辑为vector_irq:

/*
* Interrupt dispatcher
*/
vector_stub	irq, IRQ_MODE, 4

.long	__irq_usr			@  0  (USR_26 / USR_32) 从用户态进入中断的处理函数
.long	__irq_invalid			@  1  (FIQ_26 / FIQ_32)
.long	__irq_invalid			@  2  (IRQ_26 / IRQ_32)
.long	__irq_svc			@  3  (SVC_26 / SVC_32) 从SVC进入中断的处理函数
.long	__irq_invalid			@  4
.long	__irq_invalid			@  5
.long	__irq_invalid			@  6
.long	__irq_invalid			@  7
.long	__irq_invalid			@  8
.long	__irq_invalid			@  9
.long	__irq_invalid			@  a
.long	__irq_invalid			@  b
.long	__irq_invalid			@  c
.long	__irq_invalid			@  d
.long	__irq_invalid			@  e
.long	__irq_invalid			@  f

可见,由于中断发生时,当前代码可以处于用户态或内核态,因此需要区别对待,本文主要讨论从用户态进入中断的逻辑,也就是__irq_usr:

.align	5
__irq_usr:
usr_entry
kuser_cmpxchg_check
irq_handler
get_thread_info tsk
mov	why, #0
b	ret_to_user_from_irq
UNWIND(.fnend		)
ENDPROC(__irq_usr)

__irq_usr 中会调用irq_handler进行进一步的中断处理,下面看一下irq_handler定义:

/*
* Interrupt handling.
*/
.macro	irq_handler
#ifdef CONFIG_MULTI_IRQ_HANDLER
ldr	r1, =handle_arch_irq
mov	r0, sp
badr	lr, 9997f
ldr	pc, [r1]
#else
arch_irq_handler_default
#endif
9997:
.endm

irq_handler中会默认调用arch_irq_handler_default(位于/arch/arm/include/asm/entry-macro-multi.S):

/*
* Interrupt handling.  Preserves r7, r8, r9
*/
.macro	arch_irq_handler_default
get_irqnr_preamble r6, lr
1:	get_irqnr_and_base r0, r2, r6, lr
movne	r1, sp
@
@ routine called with r0 = irq number, r1 = struct pt_regs *
@
badrne	lr, 1b
bne	asm_do_IRQ

arch_irq_handler_default最终会调用asm_do_IRQ(调用之前分别把中断号赋值给r0寄存器,把pt_regs寄存器地址赋值给r1寄存器,本质就是在向asm_do_IRQ传递参数),这个函数非常特殊,因为它是用c语言编写的,也就是中断处理从asm_do_IRQ开始便进入c语言的世界,它定义在/arch/arm/kernel/irq.c:

/*
* asm_do_IRQ is the interface to be used from assembly code.
*/
asmlinkage void __exception_irq_entry
asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
{
handle_IRQ(irq, regs);
}

继续看handle_IRQ实现:

/*
* handle_IRQ handles all hardware IRQ's.  Decoded IRQs should
* not come via this function.  Instead, they should provide their
* own 'handler'.  Used by platform code implementing C-based 1st
* level decoding.
*/
void handle_IRQ(unsigned int irq, struct pt_regs *regs)
{
__handle_domain_irq(NULL, irq, false, regs);
}

在/kernel/irq/irqdesc.c中,__handle_domain_irq定义如下:

/**
* __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain
* @domain:	The domain where to perform the lookup
* @hwirq:	The HW irq number to convert to a logical one
* @lookup:	Whether to perform the domain lookup or not
* @regs:	Register file coming from the low-level handling code
*
* Returns:	0 on success, or -EINVAL if conversion has failed
*/
int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
bool lookup, struct pt_regs *regs)
{
// 先保存中断寄存器状态
struct pt_regs *old_regs = set_irq_regs(regs);
unsigned int irq = hwirq;
int ret = 0;

irq_enter();

#ifdef CONFIG_IRQ_DOMAIN
if (lookup)
irq = irq_find_mapping(domain, hwirq);
#endif

/*
* Some hardware gives randomly wrong interrupts.  Rather
* than crashing, do something sensible.
*/
if (unlikely(!irq || irq >= nr_irqs)) {
ack_bad_irq(irq);
ret = -EINVAL;
} else {
// 默认走到这里
generic_handle_irq(irq);
}

irq_exit();
// 恢复中断寄存器状态
set_irq_regs(old_regs);
return ret;
}

调用generic_handle_irq进行进一步处理:

/**
* generic_handle_irq - Invoke the handler for a particular irq
* @irq:	The irq number to handle
*
*/
int generic_handle_irq(unsigned int irq)
{
// 根据中断号拿到irq_desc结构
struct irq_desc *desc = irq_to_desc(irq);

if (!desc)
return -EINVAL;
// 进一步处理
generic_handle_irq_desc(desc);
return 0;
}
EXPORT_SYMBOL_GPL(generic_handle_irq);

其中,irq_to_desc就是根据中断号拿到对应的irq_desc结构:

struct irq_desc *irq_to_desc(unsigned int irq)
{
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
EXPORT_SYMBOL(irq_to_desc);

irq_desc是一个struct irq_desc结构体数组(数组大小为NR_IRQS):

struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS-1] = {
.handle_irq	= handle_bad_irq,
.depth		= 1,
.lock		= __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
}
};

回来继续看generic_handle_irq_desc,它只是调用了irq_desc结构的handle_irq:

/*
* Architectures call this to let the generic IRQ layer
* handle an interrupt.
*/
static inline void generic_handle_irq_desc(struct irq_desc *desc)
{
desc->handle_irq(desc);
}

irq_desc就是中断描述符,它定义在/include/linux/irqdesc.h,后文会列出它的详细定义,此处要说明的是,handle_irq并不是最终的中断处理函数,它是“highlevel irq-events handler”即高层次的中断事件处理函数(类型为typedef void (*irq_flow_handler_t)(struct irq_desc *desc)),此类函数有:处理电平触发类型的中断handler(handle_level_irq)、处理边缘触发类型的中断handler(handle_edge_irq)、处理简单类型的中断handler(handle_simple_irq)、处理EOI类型的中断handler(handle_fasteoi_irq),下面是handle_edge_irq的源码(/kernel/irq/chip.c):

/**
*	handle_edge_irq - edge type IRQ handler
*	@desc:	the interrupt description structure for this irq
*
*	Interrupt occures on the falling and/or rising edge of a hardware
*	signal. The occurrence is latched into the irq controller hardware
*	and must be acked in order to be reenabled. After the ack another
*	interrupt can happen on the same source even before the first one
*	is handled by the associated event handler. If this happens it
*	might be necessary to disable (mask) the interrupt depending on the
*	controller hardware. This requires to reenable the interrupt inside
*	of the loop which handles the interrupts which have arrived while
*	the handler was running. If all pending interrupts are handled, the
*	loop is left.
*/
void handle_edge_irq(struct irq_desc *desc)
{
raw_spin_lock(&desc->lock);

desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

if (!irq_may_run(desc)) {
desc->istate |= IRQS_PENDING;
mask_ack_irq(desc);
goto out_unlock;
}

/*
* If its disabled or no action available then mask it and get
* out of here.
*/
if (irqd_irq_disabled(&desc->irq_data) || !desc->action) {
desc->istate |= IRQS_PENDING;
mask_ack_irq(desc);
goto out_unlock;
}

kstat_incr_irqs_this_cpu(desc);

/* Start handling the irq 此处与电平触发模式不同*/
desc->irq_data.chip->irq_ack(&desc->irq_data);

do {
if (unlikely(!desc->action)) {
mask_irq(desc);
goto out_unlock;
}

/*
* When another irq arrived while we were handling
* one, we could have masked the irq.
* Renable it, if it was not disabled in meantime.
*/
if (unlikely(desc->istate & IRQS_PENDING)) {
if (!irqd_irq_disabled(&desc->irq_data) &&
irqd_irq_masked(&desc->irq_data))
unmask_irq(desc);
}

handle_irq_event(desc);

} while ((desc->istate & IRQS_PENDING) &&
!irqd_irq_disabled(&desc->irq_data));

out_unlock:
raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL(handle_edge_irq);

为了与handle_edge_irq对比,也把handle_level_irq的代码贴出来,可以看出他们的区别:

/**
*	handle_level_irq - Level type irq handler
*	@desc:	the interrupt description structure for this irq
*
*	Level type interrupts are active as long as the hardware line has
*	the active level. This may require to mask the interrupt and unmask
*	it after the associated handler has acknowledged the device, so the
*	interrupt line is back to inactive.
*/
void handle_level_irq(struct irq_desc *desc)
{
raw_spin_lock(&desc->lock);
mask_ack_irq(desc);

if (!irq_may_run(desc))
goto out_unlock;

desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

/*
* If its disabled or no action available
* keep it masked and get out of here
*/
if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
desc->istate |= IRQS_PENDING;
goto out_unlock;
}

kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);

cond_unmask_irq(desc);

out_unlock:
raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_level_irq);

高层的中断处理函数会继续调用handle_irq_event(/kernel/irq/handle.c):

irqreturn_t handle_irq_event(struct irq_desc *desc)
{
irqreturn_t ret;

desc->istate &= ~IRQS_PENDING;
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock(&desc->lock);
// 在每个cpu上处理中断
ret = handle_irq_event_percpu(desc);

raw_spin_lock(&desc->lock);
irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
return ret;
}

继续看handle_irq_event_percpu:

irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval;
unsigned int flags = 0;
// 内部调用
retval = __handle_irq_event_percpu(desc, &flags);

add_interrupt_randomness(desc->irq_data.irq, flags);

if (!noirqdebug)
note_interrupt(desc, retval);
return retval;
}

__handle_irq_event_percpu会调用用户设置的真正的中断处理函数:

irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
{
irqreturn_t retval = IRQ_NONE;
unsigned int irq = desc->irq_data.irq;
struct irqaction *action;

// 遍历中断描述符irq_desc中的所有action
for_each_action_of_desc(desc, action) {
irqreturn_t res;

trace_irq_handler_entry(irq, action);
// 调用用户设置的中断处理函数
res = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, res);

if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
irq, action->handler))
local_irq_disable();

switch (res) {
case IRQ_WAKE_THREAD:
/*
* Catch drivers which return WAKE_THREAD but
* did not set up a thread function
*/
if (unlikely(!action->thread_fn)) {
warn_no_thread(irq, action);
break;
}

__irq_wake_thread(desc, action);

/* Fall through to add to randomness */
case IRQ_HANDLED:
*flags |= action->flags;
break;

default:
break;
}

retval |= res;
}

return retval;
}

至此,一次完整的中断事件传递流程就完成了(硬件中断事件-->内核--> 最终的中断处理函数)。

中断相关的数据结构

首先,也是最重要数据结构--中断描述符irq_desc,由于该结构有非常详细的英文注释,本文就不作过多介绍:

/**
* struct irq_desc - interrupt descriptor
* @irq_common_data:	per irq and chip data passed down to chip functions
* @kstat_irqs:		irq stats per cpu
* @handle_irq:		highlevel irq-events handler
* @preflow_handler:	handler called before the flow handler (currently used by sparc)
* @action:		the irq action chain
* @status:		status information
* @core_internal_state__do_not_mess_with_it: core internal status information
* @depth:		disable-depth, for nested irq_disable() calls
* @wake_depth:		enable depth, for multiple irq_set_irq_wake() callers
* @irq_count:		stats field to detect stalled irqs
* @last_unhandled:	aging timer for unhandled count
* @irqs_unhandled:	stats field for spurious unhandled interrupts
* @threads_handled:	stats field for deferred spurious detection of threaded handlers
* @threads_handled_last: comparator field for deferred spurious detection of theraded handlers
* @lock:		locking for SMP
* @affinity_hint:	hint to user space for preferred irq affinity
* @affinity_notify:	context for notification of affinity changes
* @pending_mask:	pending rebalanced interrupts
* @threads_oneshot:	bitfield to handle shared oneshot threads
* @threads_active:	number of irqaction threads currently running
* @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
* @nr_actions:		number of installed actions on this descriptor
* @no_suspend_depth:	number of irqactions on a irq descriptor with
*			IRQF_NO_SUSPEND set
* @force_resume_depth:	number of irqactions on a irq descriptor with
*			IRQF_FORCE_RESUME set
* @rcu:		rcu head for delayed free
* @dir:		/proc/irq/ procfs entry
* @name:		flow handler name for /proc/interrupts output
*/
struct irq_desc {
struct irq_common_data	irq_common_data;
struct irq_data		irq_data;
unsigned int __percpu	*kstat_irqs;
irq_flow_handler_t	handle_irq;
#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
irq_preflow_handler_t	preflow_handler;
#endif
struct irqaction	   *action;	/* IRQ action list */
unsigned int		status_use_accessors;
unsigned int		core_internal_state__do_not_mess_with_it;
unsigned int		depth;		/* nested irq disables */
unsigned int		wake_depth;	/* nested wake enables */
unsigned int		irq_count;	/* For detecting broken IRQs */
unsigned long		last_unhandled;	/* Aging timer for unhandled count */
unsigned int		irqs_unhandled;
atomic_t		threads_handled;
int			threads_handled_last;
raw_spinlock_t		lock;
struct cpumask		*percpu_enabled;
const struct cpumask	*percpu_affinity;
#ifdef CONFIG_SMP
const struct cpumask	*affinity_hint;
struct irq_affinity_notify *affinity_notify;
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_var_t		pending_mask;
#endif
#endif
unsigned long		threads_oneshot;
atomic_t		threads_active;
wait_queue_head_t       wait_for_threads;
#ifdef CONFIG_PM_SLEEP
unsigned int		nr_actions;
unsigned int		no_suspend_depth;
unsigned int		cond_suspend_depth;
unsigned int		force_resume_depth;
#endif
#ifdef CONFIG_PROC_FS
struct proc_dir_entry	*dir;
#endif
#ifdef CONFIG_SPARSE_IRQ
struct rcu_head		rcu;
#endif
int			parent_irq;
struct module		*owner;
const char		*name;
} ____cacheline_internodealigned_in_smp;

irq_desc中有一个action成员,它是struct irqaction 类型指针,最终指向struct irqaction链表, struct irqaction定义如下:

/**
* struct irqaction - per interrupt action descriptor
* @handler:	interrupt handler function
* @name:	name of the device
* @dev_id:	cookie to identify the device
* @percpu_dev_id:	cookie to identify the device
* @next:	pointer to the next irqaction for shared interrupts
* @irq:	interrupt number
* @flags:	flags (see IRQF_* above)
* @thread_fn:	interrupt handler function for threaded interrupts
* @thread:	thread pointer for threaded interrupts
* @secondary:	pointer to secondary irqaction (force threading)
* @thread_flags:	flags related to @thread
* @thread_mask:	bitmask for keeping track of @thread activity
* @dir:	pointer to the proc/irq/NN/name entry
*/
struct irqaction {
irq_handler_t		handler;/* 这里就是用户设置的中断处理函数*/
void			*dev_id;/* 用户设置的私有数据 */
void __percpu		*percpu_dev_id;
struct irqaction     	*next; /* 用于组成链表 */
irq_handler_t		thread_fn;
struct task_struct	*thread;
struct irqaction	*secondary;
unsigned int		irq;       /* 中断号 */
unsigned int		flags;       /* 中断标识 */
unsigned long		thread_flags;
unsigned long		thread_mask;
const char		*name;
struct proc_dir_entry	*dir;
} ____cacheline_internodealigned_in_smp;

irq_desc中另一个比较重要的成员为:irq_data,类型为struct irq_data,它主要用来封装硬件架构相关的中断操作,其定义如下:

/**
* struct irq_data - per irq chip data passed down to chip functions
* @mask:		precomputed bitmask for accessing the chip registers
* @irq:		interrupt number
* @hwirq:		hardware interrupt number, local to the interrupt domain
* @common:		point to data shared by all irqchips
* @chip:		low level interrupt hardware access
* @domain:		Interrupt translation domain; responsible for mapping
*			between hwirq number and linux irq number.
* @parent_data:	pointer to parent struct irq_data to support hierarchy
*			irq_domain
* @chip_data:		platform-specific per-chip private data for the chip
*			methods, to allow shared chip implementations
*/
struct irq_data {
u32			mask;
unsigned int		irq;
unsigned long		hwirq;
struct irq_common_data	*common;
struct irq_chip		*chip;      /* 处理器硬件相关的中断操作 */
struct irq_domain	*domain;
#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
struct irq_data		*parent_data;
#endif
void			*chip_data;
};

irq_data中的chip成员和具体的硬件处理器有关,中断的打开、关闭以及中断标志位的设置、清除等都是和具体的硬件cpu相关的,因此为了提高框架层次性,内核将中断的硬件操作全部抽象在struct irq_chip结构中,该结构定义在/include/linux/irq.h:

/**
* struct irq_chip - hardware interrupt chip descriptor
*
* @parent_device:	pointer to parent device for irqchip
* @name:		name for /proc/interrupts
* @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
* @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
* @irq_enable:		enable the interrupt (defaults to chip->unmask if NULL)
* @irq_disable:	disable the interrupt
* @irq_ack:		start of a new interrupt
* @irq_mask:		mask an interrupt source
* @irq_mask_ack:	ack and mask an interrupt source
* @irq_unmask:		unmask an interrupt source
* @irq_eoi:		end of interrupt
* @irq_set_affinity:	set the CPU affinity on SMP machines
* @irq_retrigger:	resend an IRQ to the CPU
* @irq_set_type:	set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
* @irq_set_wake:	enable/disable power-management wake-on of an IRQ
* @irq_bus_lock:	function to lock access to slow bus (i2c) chips
* @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
* @irq_cpu_online:	configure an interrupt source for a secondary CPU
* @irq_cpu_offline:	un-configure an interrupt source for a secondary CPU
* @irq_suspend:	function called from core code on suspend once per
*			chip, when one or more interrupts are installed
* @irq_resume:		function called from core code on resume once per chip,
*			when one ore more interrupts are installed
* @irq_pm_shutdown:	function called from core code on shutdown once per chip
* @irq_calc_mask:	Optional function to set irq_data.mask for special cases
* @irq_print_chip:	optional to print special chip info in show_interrupts
* @irq_request_resources:	optional to request resources before calling
*				any other callback related to this irq
* @irq_release_resources:	optional to release resources acquired with
*				irq_request_resources
* @irq_compose_msi_msg:	optional to compose message content for MSI
* @irq_write_msi_msg:	optional to write message content for MSI
* @irq_get_irqchip_state:	return the internal state of an interrupt
* @irq_set_irqchip_state:	set the internal state of a interrupt
* @irq_set_vcpu_affinity:	optional to target a vCPU in a virtual machine
* @ipi_send_single:	send a single IPI to destination cpus
* @ipi_send_mask:	send an IPI to destination cpus in cpumask
* @flags:		chip specific flags
*/
struct irq_chip {
struct device	*parent_device;
const char	*name;
unsigned int	(*irq_startup)(struct irq_data *data);
void		(*irq_shutdown)(struct irq_data *data);
void		(*irq_enable)(struct irq_data *data);
void		(*irq_disable)(struct irq_data *data);

void		(*irq_ack)(struct irq_data *data);
void		(*irq_mask)(struct irq_data *data);
void		(*irq_mask_ack)(struct irq_data *data);
void		(*irq_unmask)(struct irq_data *data);
void		(*irq_eoi)(struct irq_data *data);

int		(*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
int		(*irq_retrigger)(struct irq_data *data);
int		(*irq_set_type)(struct irq_data *data, unsigned int flow_type);
int		(*irq_set_wake)(struct irq_data *data, unsigned int on);

void		(*irq_bus_lock)(struct irq_data *data);
void		(*irq_bus_sync_unlock)(struct irq_data *data);

void		(*irq_cpu_online)(struct irq_data *data);
void		(*irq_cpu_offline)(struct irq_data *data);

void		(*irq_suspend)(struct irq_data *data);
void		(*irq_resume)(struct irq_data *data);
void		(*irq_pm_shutdown)(struct irq_data *data);

void		(*irq_calc_mask)(struct irq_data *data);

void		(*irq_print_chip)(struct irq_data *data, struct seq_file *p);
int		(*irq_request_resources)(struct irq_data *data);
void		(*irq_release_resources)(struct irq_data *data);

void		(*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg);
void		(*irq_write_msi_msg)(struct irq_data *data, struct msi_msg *msg);

int		(*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state);
int		(*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state);

int		(*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info);

void		(*ipi_send_single)(struct irq_data *data, unsigned int cpu);
void		(*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest);

unsigned long	flags;
};

最后,还有一个结构必须提一下,那就是pt_regs,从注释中可以看到,它主要用来在内核发生中断、系统调用时将寄存器的内容保存到内核栈上,以便后期恢复上下文。

/*
* This struct defines the way the registers are stored on the
* kernel stack during a system call or other kernel entry
*
* NOTE! I want to minimize the overhead of system calls, so this
* struct has as little information as possible.  I does not have
*
*  - floating point regs: the kernel doesn't change those
*  - r9-15: saved by the C compiler
*
* This makes "fork()" and "exec()" a bit more complex, but should
* give us low system call latency.
*/

struct pt_regs {
unsigned long r0;
unsigned long r1;
unsigned long r2;
unsigned long r3;
unsigned long r4;
unsigned long r5;
unsigned long r6;
unsigned long r7;
unsigned long r8;
unsigned long r19;
unsigned long r20;
unsigned long r21;
unsigned long r22;
unsigned long r23;
unsigned long r24;
unsigned long r25;
unsigned long r26;
unsigned long r27;
unsigned long r28;
unsigned long hae;
/* JRP - These are the values provided to a0-a2 by PALcode */
unsigned long trap_a0;
unsigned long trap_a1;
unsigned long trap_a2;
/* These are saved by PAL-code: */
unsigned long ps;
unsigned long pc;
unsigned long gp;
unsigned long r16;
unsigned long r17;
unsigned long r18;
};

/*
* This is the extended stack used by signal handlers and the context
* switcher: it's pushed after the normal "struct pt_regs".
*/
struct switch_stack {
unsigned long r9;
unsigned long r10;
unsigned long r11;
unsigned long r12;
unsigned long r13;
unsigned long r14;
unsigned long r15;
unsigned long r26;
unsigned long fp[32];	/* fp[31] is fpcr */
};


内核中断初始化过程

前文在论述中断事件传播过程中,还没有提及一些硬件相关的操作是如何、何时初始化的,比如 struct irq_chip相关的设置等,要讨论这些内容,还得回到linux 内核启动阶段。

在/init/main.c的start_kernel中,会调用early_irq_init,它位于/kernel/irq/irqdesc.c:

int __init early_irq_init(void)
{
int count, i, node = first_online_node;
struct irq_desc *desc;

init_irq_default_affinity();

printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);

desc = irq_desc;
count = ARRAY_SIZE(irq_desc);

// 为中断分配中断描述符结构
for (i = 0; i < count; i++) {
desc[i].kstat_irqs = alloc_percpu(unsigned int);
alloc_masks(&desc[i], GFP_KERNEL, node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
desc_set_defaults(i, &desc[i], node, NULL, NULL);
}
// arch_early_irq_init()在主要用于x86平台和PPC平台,其他平台上为空函数
return arch_early_irq_init();
}

start_kernel中紧接着early_irq_init被调用的是init_IRQ,它位于/arch/arm/kernel/irq.c:

void __init init_IRQ(void)
{
int ret;

// 如果关闭配置并且没有定义machine_desc->init_irq,一般情况下都是定义了machine_desc->init_irq的
if (IS_ENABLED(CONFIG_OF) && !machine_desc->init_irq)
irqchip_init();
else
machine_desc->init_irq();/* 这里会调用具体的cpu相关的中断初始化函数 */

// cpu相关的缓存相关的初始化
if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_CACHE_L2X0) &&
(machine_desc->l2c_aux_mask || machine_desc->l2c_aux_val)) {
if (!outer_cache.write_sec)
outer_cache.write_sec = machine_desc->l2c_write_sec;
ret = l2x0_of_init(machine_desc->l2c_aux_val,
machine_desc->l2c_aux_mask);
if (ret && ret != -ENODEV)
pr_err("L2C: failed to init: %d\n", ret);
}

uniphier_cache_init();
}

上面提到的machine_desc又是什么呢?从字面意思可以看出,它是一个机器的描述符,对于ARM架构而言,它定义在/arch/arm/include/asm/mach/arch.h:

struct machine_desc {
unsigned int		nr;		/* architecture number	*/
const char		*name;		/* architecture name	*/
unsigned long		atag_offset;	/* tagged list (relative) */
const char *const 	*dt_compat;	/* array of device tree
* 'compatible' strings	*/

unsigned int		nr_irqs;	/* number of IRQs */

#ifdef CONFIG_ZONE_DMA
phys_addr_t		dma_zone_size;	/* size of DMA-able area */
#endif

unsigned int		video_start;	/* start of video RAM	*/
unsigned int		video_end;	/* end of video RAM	*/

unsigned char		reserve_lp0 :1;	/* never has lp0	*/
unsigned char		reserve_lp1 :1;	/* never has lp1	*/
unsigned char		reserve_lp2 :1;	/* never has lp2	*/
enum reboot_mode	reboot_mode;	/* default restart mode	*/
unsigned		l2c_aux_val;	/* L2 cache aux value	*/
unsigned		l2c_aux_mask;	/* L2 cache aux mask	*/
void			(*l2c_write_sec)(unsigned long, unsigned);
const struct smp_operations	*smp;	/* SMP operations	*/
bool			(*smp_init)(void);
void			(*fixup)(struct tag *, char **);
void			(*dt_fixup)(void);
long long		(*pv_fixup)(void);
void			(*reserve)(void);/* reserve mem blocks	*/
void			(*map_io)(void);/* IO mapping function	*/
void			(*init_early)(void);
void			(*init_irq)(void);
void			(*init_time)(void);
void			(*init_machine)(void);
void			(*init_late)(void);
#ifdef CONFIG_MULTI_IRQ_HANDLER
void			(*handle_irq)(struct pt_regs *);
#endif
void			(*restart)(enum reboot_mode, const char *);
};

内核中通常不会直接定义machine_desc结构,相反,内核提供了一个宏MACHINE_START来完成此任务,该宏定义在/arch/arm/include/asm/mach/arch.h中:

/*
* Set of macros to define architecture features.  This is built into
* a table by the linker.
*/
#define MACHINE_START(_type,_name)			\
static const struct machine_desc __mach_desc_##_type	\
__used							\
__attribute__((__section__(".arch.info.init"))) = {	\
.nr		= MACH_TYPE_##_type,		\
.name		= _name,

#define MACHINE_END				\
};

可以看到,MACHINE_START宏不只是简单的定义了machine_desc结构,它还使用gcc编译器属性__attribute__((__section__(".arch.info.init")))指定了machine_desc结构必须存放在".arch.info.init"段中,".arch.info.init"段又在哪里呢?这时就需要看一下/arch/arm/kernel/vmlinux.lds.S了,连接脚本有如下片段:

.init.arch.info : {
__arch_info_begin = .;
*(.arch.info.init)
__arch_info_end = .;
}

同时,为了方便获取".arch.info.init"段的内容和遍历它,内核还提供了以下工具函数:

/*
* Machine type table - also only accessible during boot
*/
extern const struct machine_desc __arch_info_begin[], __arch_info_end[];
#define for_each_machine_desc(p)			\
for (p = __arch_info_begin; p < __arch_info_end; p++)

说了这么多,来看一个具体的例子,比如常用的mini2440开发板对应的machine_desc定义,它位于/arch/arm/mach-s3c24xx/mach-mini2440.c:

MACHINE_START(MINI2440, "MINI2440")
/* Maintainer: Michel Pollet <buserror@gmail.com> */
.atag_offset	= 0x100,
.map_io		= mini2440_map_io,
.init_machine	= mini2440_init,
.init_irq	= s3c2440_init_irq,
.init_time	= mini2440_init_time,
MACHINE_END

由于本文主要讲解中断,因此着重看一下s3c2440_init_irq:

void __init s3c2440_init_irq(void)
{
pr_info("S3C2440: IRQ Support\n");

#ifdef CONFIG_FIQ
init_FIQ(FIQ_START);
#endif

s3c_intc[0] = s3c24xx_init_intc(NULL, &init_s3c2440base[0], NULL,
0x4a000000);
if (IS_ERR(s3c_intc[0])) {
pr_err("irq: could not create main interrupt controller\n");
return;
}

s3c24xx_init_intc(NULL, &init_eint[0], s3c_intc[0], 0x560000a4);
s3c_intc[1] = s3c24xx_init_intc(NULL, &init_s3c2440subint[0],
s3c_intc[0], 0x4a000018);
}

可是,到目前为止,依然没有看到在哪里有设置chip的代码,其实,chip的设置并不是显示调用的,它使用了与内核模块初始化函数相同的调用机制,即把chip的设置放在一个init初始化函数中,有内核自动去调用,以S3C2440为例,代码在/arch/arm/mach-s3c24xx/bast-irq.c:

static __init int bast_irq_init(void)
{
unsigned int i;

if (machine_is_bast()) {
printk(KERN_INFO "BAST PC104 IRQ routing, Copyright 2005 Simtec Electronics\n");

/* zap all the IRQs */

__raw_writeb(0x0, BAST_VA_PC104_IRQMASK);

irq_set_chained_handler(BAST_IRQ_ISA, bast_irq_pc104_demux);

/* register our IRQs */

for (i = 0; i < 4; i++) {
unsigned int irqno = bast_pc104_irqs[i];
// 这里设置chip
irq_set_chip_and_handler(irqno, &bast_pc104_chip,
handle_level_irq);
irq_clear_status_flags(irqno, IRQ_NOREQUEST);
}
}

return 0;
}

arch_initcall(bast_irq_init);

注意看arch_initcall(bast_irq_init)的实现,在/include/linux/init.h中:

#define arch_initcall(fn)		__define_initcall(fn, 3)

#define __define_initcall(fn, id) \
static initcall_t __initcall_##fn##id __used \
__attribute__((__section__(".initcall" #id ".init"))) = fn; \
LTO_REFERENCE_INITCALL(__initcall_##fn##id)

可见,原理与内核模块moudle_init与moudle_exit原理一致,都是放在.initcall段中。

内核中断申请

终于来到了最简单的部分了,中断的申请和释放才是内核驱动开发中经常使用的api,比如在使用一个中断之前要先申请中断,它通过函数request_irq,其定义位于 /include/linux/interrupt.h:

static inline int __must_check
request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
const char *name, void *dev)
{
// 直接转而调用request_threaded_irq
return request_threaded_irq(irq, handler, NULL, flags, name, dev);
}
// 用户需要定义的中断处理函数类型
typedef irqreturn_t (*irq_handler_t)(int, void *);

继续看request_threaded_irq实现:

/**
*	request_threaded_irq - allocate an interrupt line
*	@irq: Interrupt line to allocate
*	@handler: Function to be called when the IRQ occurs.
*		  Primary handler for threaded interrupts
*		  If NULL and thread_fn != NULL the default
*		  primary handler is installed
*	@thread_fn: Function called from the irq handler thread
*		    If NULL, no irq thread is created
*	@irqflags: Interrupt type flags
*	@devname: An ascii name for the claiming device
*	@dev_id: A cookie passed back to the handler function
*
*	This call allocates interrupt resources and enables the
*	interrupt line and IRQ handling. From the point this
*	call is made your handler function may be invoked. Since
*	your handler function must clear any interrupt the board
*	raises, you must take care both to initialise your hardware
*	and to set up the interrupt handler in the right order.
*
*	If you want to set up a threaded irq handler for your device
*	then you need to supply @handler and @thread_fn. @handler is
*	still called in hard interrupt context and has to check
*	whether the interrupt originates from the device. If yes it
*	needs to disable the interrupt on the device and return
*	IRQ_WAKE_THREAD which will wake up the handler thread and run
*	@thread_fn. This split handler design is necessary to support
*	shared interrupts.
*
*	Dev_id must be globally unique. Normally the address of the
*	device data structure is used as the cookie. Since the handler
*	receives this value it makes sense to use it.
*
*	If your interrupt is shared you must pass a non NULL dev_id
*	as this is required when freeing the interrupt.
*
*	Flags:
*
*	IRQF_SHARED		Interrupt is shared
*	IRQF_TRIGGER_*		Specify active edge(s) or level
*
*/
int request_threaded_irq(unsigned int irq, irq_handler_t handler,
irq_handler_t thread_fn, unsigned long irqflags,
const char *devname, void *dev_id)
{

struct irqaction *action;  // 定义一个action指针,它会封装用户提供的中断处理函数
struct irq_desc *desc;     // 中断描述符
int retval;

if (irq == IRQ_NOTCONNECTED)
return -ENOTCONN;

/*
* Sanity-check: shared interrupts must pass in a real dev-ID,
* otherwise we'll have trouble later trying to figure out
* which interrupt is which (messes up the interrupt freeing
* logic etc).
*
* Also IRQF_COND_SUSPEND only makes sense for shared interrupts and
* it cannot be set along with IRQF_NO_SUSPEND.
*/
if (((irqflags & IRQF_SHARED) && !dev_id) ||
(!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) ||
((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND)))
return -EINVAL;

desc = irq_to_desc(irq);  // 根据中断号可以获取相应的中断描述符(前文已经论述过其分配过程)
if (!desc)
return -EINVAL;

if (!irq_settings_can_request(desc) ||
WARN_ON(irq_settings_is_per_cpu_devid(desc)))
return -EINVAL;

if (!handler) {
if (!thread_fn)
return -EINVAL;
handler = irq_default_primary_handler;
}

action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); // 分配acrion结构
if (!action)
return -ENOMEM;

// 设置action结构
action->handler = handler;
action->thread_fn = thread_fn;
action->flags = irqflags;
action->name = devname;
action->dev_id = dev_id;  // 用户传递的私有数据

retval = irq_chip_pm_get(&desc->irq_data);
if (retval < 0) {
kfree(action);
return retval;
}

chip_bus_lock(desc);
// 把中断设置进去,其实就是把action结构加到链表中
retval = __setup_irq(irq, desc, action);
chip_bus_sync_unlock(desc);

if (retval) {
irq_chip_pm_put(&desc->irq_data);
kfree(action->secondary);
kfree(action);
}

#ifdef CONFIG_DEBUG_SHIRQ_FIXME
if (!retval && (irqflags & IRQF_SHARED)) {
/*
* It's a shared IRQ -- the driver ought to be prepared for it
* to happen immediately, so let's make sure....
* We disable the irq to make sure that a 'real' IRQ doesn't
* run in parallel with our fake.
*/
unsigned long flags;

disable_irq(irq);
local_irq_save(flags);

handler(irq, dev_id);

local_irq_restore(flags);
enable_irq(irq);
}
#endif
return retval;
}

中断使用完成之后,需要使用request_threaded_irq释放中断,代码位于/kernel/irq/manage.c:

/**
*	free_irq - free an interrupt allocated with request_irq
*	@irq: Interrupt line to free
*	@dev_id: Device identity to free
*
*	Remove an interrupt handler. The handler is removed and if the
*	interrupt line is no longer in use by any driver it is disabled.
*	On a shared IRQ the caller must ensure the interrupt is disabled
*	on the card it drives before calling this function. The function
*	does not return until any executing interrupts for this IRQ
*	have completed.
*
*	This function must not be called from interrupt context.
*/
void free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);// 同样,还是先根据中断号拿到中断描述符

if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
return;

#ifdef CONFIG_SMP
if (WARN_ON(desc->affinity_notify))
desc->affinity_notify = NULL;
#endif

kfree(__free_irq(irq, dev_id)); // 释放相应内存
}


__free_irq的代码比较多,但是原理可以想象很简单,就是根据中断号找到对应的中断描述符,在找到对应的action,将其从链表中删除,并释放内存。

/*
* Internal function to unregister an irqaction - used to free
* regular and special interrupts that are part of the architecture.
*/
static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
unsigned long flags;

WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);

if (!desc)
return NULL;

chip_bus_lock(desc);
raw_spin_lock_irqsave(&desc->lock, flags);

/*
* There can be multiple actions per IRQ descriptor, find the right
* one based on the dev_id:
*/
action_ptr = &desc->action;
for (;;) {
action = *action_ptr;

if (!action) {
WARN(1, "Trying to free already-free IRQ %d\n", irq);
raw_spin_unlock_irqrestore(&desc->lock, flags);
chip_bus_sync_unlock(desc);
return NULL;
}

if (action->dev_id == dev_id)
break;
action_ptr = &action->next;
}

/* Found it - now remove it from the list of entries: */
*action_ptr = action->next;

irq_pm_remove_action(desc, action);

/* If this was the last handler, shut down the IRQ line: */
if (!desc->action) {
irq_settings_clr_disable_unlazy(desc);
irq_shutdown(desc);
irq_release_resources(desc);
}

#ifdef CONFIG_SMP
/* make sure affinity_hint is cleaned up */
if (WARN_ON_ONCE(desc->affinity_hint))
desc->affinity_hint = NULL;
#endif

raw_spin_unlock_irqrestore(&desc->lock, flags);
chip_bus_sync_unlock(desc);

unregister_handler_proc(irq, action);

/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);

#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
* event to happen even now it's being freed, so let's make sure that
* is so by doing an extra call to the handler ....
*
* ( We do this after actually deregistering it, to make sure that a
*   'real' IRQ doesn't run in * parallel with our fake. )
*/
if (action->flags & IRQF_SHARED) {
local_irq_save(flags);
action->handler(irq, dev_id);
local_irq_restore(flags);
}
#endif

if (action->thread) {
kthread_stop(action->thread);
put_task_struct(action->thread);
if (action->secondary && action->secondary->thread) {
kthread_stop(action->secondary->thread);
put_task_struct(action->secondary->thread);
}
}

irq_chip_pm_put(&desc->irq_data);
module_put(desc->owner);
kfree(action->secondary);
return action;
}


总结

前面说了这么多,估计很多人都看懵了,其实内容虽多,但是思路还是很清晰的,为了把上文提到的结构串起来,盗用网上的一张图说明一下,由于该图基于的内核版本远低于我参考的内核版本(4.0以后),因此下图展示的结构和我分析的源码有些诧异,但是不妨碍理解。

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息