您的位置:首页 > 运维架构 > Linux

基于linux2.6.38.8内核启动过程完全解析[一]

2012-02-18 13:36 519 查看
***************************************************************************************************************************

作者:EasyWave 时间:2012.02.18

类别:linux驱动开发 声明:转载,请保留链接

***************************************************************************************************************************

一: Linux kernel内存存布局

在ARM平台中zImage.bin是一个压缩镜像,它用于将被压缩的kernel解压缩到KERNEL_RAM_PADDR开始的一段内存中,接着跳进真正的kernel去执行。该kernel的执行起点是stext函数,定义于arch/arm/kernel/head.S。在分析ENTRY(stext)前,先介绍此时内存的布局如下图所示:



图一:内存布局[此处借用下网络上已有的图片,自己不想弄图片了]
在我的YL-E2410的平台上,SDRAM的开始内存地址是0x30000000,大小为64M,即0x20000000。 Linux2.6.38.8 ARM kernel将SDRAM的开始地址定义为PHYS_OFFSET。经bootloader加载kernel并由自解压部分代码运行后,最终kernel被放置到KERNEL_RAM_PADDR(=PHYS_OFFSET + TEXT_OFFSET,即0x30008000)地址上的一段内存,经此放置后,kernel代码以后均不会被移动。在arch\arm\mach-s3c2410\Makefile.boot文件,其内容如下:

zreladdr-y := 0x30008000

params_phys-y := 0x30000100

这也验证了为什么内核会被放置在0x30008000的地方了,这个地址必须由Makefile.boot指定。而params_phys-y则是linux Kernel的taglist等参数的起始地址。在进入kernel代码前,即自解压缩阶段,ARM未开启MMU功能。因此启动代码一个重要功能是设置好相应的页表,并开启MMU功能。为了支持MMU功能,kernel镜像中的所有符号,包括代码段和数据段的符号,在链接时都生成了它在开启MMU时,所在物理内存地址映射到的虚拟内存地址。Kernel第一个符号stext为例,在编译链接,它生成的虚拟地址是0xc0008000,而放置它的物理地址为0x30008000。实际上这个变换可以利用简单的公式进行表示:va
= pa – PHYS_OFFSET + PAGE_OFFSET。Arm linux最终的kernel空间的页表,就是按照这个关系来建立。之所提及linux的内存映射,原因是在进入kernel代码,里面所有符号地址值为0xCxxxxxxx地址,而此时ARM未开启MMU功能,故在执行stext函数第一条执行时,它的PC值就是stext所在的内存地址(即物理地址,0x30008000)。

二:stext函数详解

stext函数定义在arch/arm/kernel/head.S,它的功能是获取处理器类型和机器类型信息,并创建临时的页表,然后开启MMU功能,并跳进第一个C语言函数start_kernel。stext函数的在前置条件是:MMU, D-cache, 关闭; r0 = 0, r1 = machine nr, r2 = atags prointer.

/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*  Kernel startup code for all 32-bit CPUs
*/
#include <linux/linkage.h>
#include <linux/init.h>

#include <asm/assembler.h>
#include <asm/domain.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <asm/memory.h>
#include <asm/thread_info.h>
#include <asm/system.h>

#ifdef CONFIG_DEBUG_LL
#include <mach/debug-macro.S>
#endif

#if (PHYS_OFFSET & 0x001fffff)
#error "PHYS_OFFSET must be at an even 2MiB boundary!"
#endif

#define KERNEL_RAM_VADDR	(PAGE_OFFSET + TEXT_OFFSET)
#define KERNEL_RAM_PADDR	(PHYS_OFFSET + TEXT_OFFSET)

/*
* swapper_pg_dir is the virtual address of the initial page table.
* We place the page tables 16K below KERNEL_RAM_VADDR.  Therefore, we must
* make sure that KERNEL_RAM_VADDR is correctly set.  Currently, we expect
* the least significant 16 bits to be 0x8000, but we could probably
* relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000.
*/
#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000
#error KERNEL_RAM_VADDR must start at 0xXXXX8000
#endif

.globl	swapper_pg_dir
.equ	swapper_pg_dir, KERNEL_RAM_VADDR - 0x4000

.macro	pgtbl, rd
ldr	\rd, =(KERNEL_RAM_PADDR - 0x4000)
.endm

#ifdef CONFIG_XIP_KERNEL
#define KERNEL_START	XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
#define KERNEL_END	_edata_loc
#else
#define KERNEL_START	KERNEL_RAM_VADDR
#define KERNEL_END	_end
#endif

/*
* Kernel startup entry point.
* ---------------------------
*
* This is normally called from the decompressor code.  The requirements
* are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
* r1 = machine nr, r2 = atags pointer.
*
* This code is mostly position independent, so if you link the kernel at
* 0xc0008000, you call this at __pa(0xc0008000).
*
* See linux/arch/arm/tools/mach-types for the complete list of machine
* numbers for r1.
*
* We're trying to keep crap to a minimum; DO NOT add any machine specific
* crap here - that's what the boot loader (or in extreme, well justified
* circumstances, zImage) is for.
*/
__HEAD
ENTRY(stext)
/* 设置CPU运行模式为SVC,并关中断 */
setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
@ and irqs disabled
mrc	p15, 0, r9, c0, c0		@ get processor id
bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
* r10指向cpu对应的proc_info记录 */
movs	r10, r5				@ invalid processor (r5=0)?
THUMB( it	eq )		@ force fixup-able long branch encoding
beq	__error_p			@ yes, error 'p'
bl	__lookup_machine_type		@ r5=machinfo
/* r8 指向开发板对应的arch_info记录 */
movs	r8, r5				@ invalid machine (r5=0)?
THUMB( it	eq )		@ force fixup-able long branch encoding
beq	__error_a			@ yes, error 'a'
/* __vet_atags函数涉及bootloader造知kernel物理内存的情况 */
/*
* r1 = machine no, r2 = atags,
* r8 = machinfo, r9 = cpuid, r10 = procinfo
*/
bl	__vet_atags
#ifdef CONFIG_SMP_ON_UP
bl	__fixup_smp
#endif
/*  创建临时页表 */
bl	__create_page_tables

/*
* The following calls CPU specific code in a position independent
* manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
* xxx_proc_info structure selected by __lookup_machine_type
* above.  On return, the CPU will be ready for the MMU to be
* turned on, and r0 will hold the CPU control register value.
* 这里的逻辑关系相当复杂,先是从proc_info结构中的中跳进__arm920_setup函数,
* 然后执__enable_mmu 函数。最后在__enable_mmu函数通过mov pc, r13来执行__mmap_switched,
* __mmap_switched函数在最后一条语句,鱼跃龙门,跳进第一个C语言函数start_kernel
*/
ldr	r13, =__mmap_switched		@ address to jump to after
@ mmu has been enabled
adr	lr, BSYM(1f)			@ return (PIC) address
ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
THUMB(	mov	pc, r12				)
1:	b	__enable_mmu
ENDPROC(stext)
.ltorg

三:__lookup_processor_type函数

__lookup_processor_type函数是一个非常讲究技巧的函数,Kernel代码将所有CPU信息的定义都放到.proc.info.init段中,因此可以认为.proc.info.init段就是一个数组,每个元素都定义了一个或一种CPU的信息。目前__lookup_processor_type使用该元素的前两个字段cpuid和mask来匹配当前CPUID,如果满足CPUID & mask == cpuid,则找到当前cpu的定义并返回。

/*
* Read processor ID register (CP#15, CR0), and look up in the linker-built
* supported processor list.  Note that we can't use the absolute addresses
* for the __proc_info lists since we aren't running with the MMU on
* (and therefore, we are not in the correct address space).  We have to
* calculate the offset.
*
*	r9 = cpuid
* Returns:
*	r3, r4, r6 corrupted
*	r5 = proc_info pointer in physical address space
*	r9 = cpuid (preserved)
*/
__CPUINIT
__lookup_processor_type:
/* adr 是相对寻址,它的寻计算结果是将当前PC值加上__lookup_processor_type_data符号与PC的偏移量,
* 而PC是物理地址,因此r3的结果也是__lookup_processor_type_data符号的物理地址 */
adr	r3, __lookup_processor_type_data
ldmia	r3, {r4 - r6}
sub	r3, r3, r4			@ get offset between virt&phys
add	r5, r5, r3			@ convert virt addresses to
add	r6, r6, r3			@ physical address space
1:	ldmia	r5, {r3, r4}			@ value, mask
/* 将当前CPUID和mask相与,并与数组元素中的CPUID比较是否相同
* 若相同,则找到当前CPU的__proc_info定义,r5指向访元素并返回。
*/
and	r4, r4, r9			@ mask wanted bits
teq	r3, r4
beq	2f
/* r5指向下一个__proc_info元素 */
add	r5, r5, #PROC_INFO_SZ		@ sizeof(proc_info_list)
/* 是否遍历完所有__proc_info元素 */
cmp	r5, r6
blo	1b
/* 找不到则返回NULL */
mov	r5, #0				@ unknown processor
2:	mov	pc, lr
ENDPROC(__lookup_processor_type)

四:__lookup_machine_type 函数

__lookup_machine_type 和__lookup_processor_type像对孪生兄弟,它们的行为都是很类似的:__lookup_machine_type根据r1寄存器的机器编号到.arch.info.init段的数组中依次查找机器编号与r1相同的记录。它使了与它孪生兄弟同样的手法进行虚拟地址到物理地址的转换计算。

/*
* Lookup machine architecture in the linker-build list of architectures.
* Note that we can't use the absolute addresses for the __arch_info
* lists since we aren't running with the MMU on (and therefore, we are
* not in the correct address space).  We have to calculate the offset.
*
*  r1 = machine architecture number
* Returns:
*  r3, r4, r6 corrupted
*  r5 = mach_info pointer in physical address space
*/
__lookup_machine_type:
adr	r3, __lookup_machine_type_data
ldmia	r3, {r4, r5, r6}
sub	r3, r3, r4			@ get offset between virt&phys
add	r5, r5, r3			@ convert virt addresses to
add	r6, r6, r3			@ physical address space
1:	ldr	r3, [r5, #MACHINFO_TYPE]	@ get machine type
teq	r3, r1				@ matches loader number?
beq	2f				@ found
add	r5, r5, #SIZEOF_MACHINE_DESC	@ next machine_desc
cmp	r5, r6
blo	1b
mov	r5, #0				@ unknown machine
2:	mov	pc, lr
ENDPROC(__lookup_machine_type)

具体的请看我的另一篇文章:MACHINE_START and MACHINE_END Macro define

五:为kernel建立临时页表

前面提及到,kernel里面的所有符号在链接时,都使用了虚拟地址值。在完成基本的初始化后,kernel代码将跳到第一个C语言函数start_kernl来执行,在哪个时候,这些虚拟地址必须能够对它所存放在真正内存位置,否则运行将为出错。为此,CPU必须开启MMU,但在开启MMU前,必须为虚拟地址到物理地址的映射建立相应的面表。在开启MMU后,kernel指并不马上将PC值指向start_kernl,而是要做一些C语言运行期的设置,如堆栈,重定义等工作后才跳到start_kernel去执行。在此过程中,PC值还是物理地址,因此还需要为这段内存空间建立va
= pa的内存映射关系。当然,本函数建立的所有页表都会在将来paging_init销毁再重建,这是临时过度性的映射关系和页表。

在介绍__create_table_pages前,先认识一个macro pgtbl,它将KERNL_RAM_PADDR – 0x4000的值赋给rd寄存器,从下面的使用中可以看它,该值是页表在物理内存的基础,也即页表放在kernel开始地址下的16K的地方。

.macro pgtbl, rd

ldr \rd, =(KERNEL_RAM_PADDR - 0x4000)

.endm


/*
* Setup the initial page tables.  We only setup the barest
* amount which are required to get the kernel running, which
* generally means mapping in the kernel code.
*
* r8  = machinfo
* r9  = cpuid
* r10 = procinfo
*
* Returns:
*  r0, r3, r5-r7 corrupted
*  r4 = physical page table address
*/
__create_page_tables:
pgtbl	r4				@ page table address

/*
* Clear the 16K level 1 swapper page table
*/
mov	r0, r4
mov	r3, #0
add	r6, r0, #0x4000
1:	str	r3, [r0], #4
str	r3, [r0], #4
str	r3, [r0], #4
str	r3, [r0], #4
teq	r0, r6
bne	1b

ldr	r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags

/*
* Create identity mapping to cater for __enable_mmu.
* This identity mapping will be removed by paging_init().
*/
adr	r0, __enable_mmu_loc
ldmia	r0, {r3, r5, r6}
sub	r0, r0, r3			@ virt->phys offset
add	r5, r5, r0			@ phys __enable_mmu
add	r6, r6, r0			@ phys __enable_mmu_end
mov	r5, r5, lsr #20
mov	r6, r6, lsr #20

1:	orr	r3, r7, r5, lsl #20		@ flags + kernel base
str	r3, [r4, r5, lsl #2]		@ identity mapping
teq	r5, r6
addne	r5, r5, #1			@ next section
bne	1b

/*
* Now setup the pagetables for our kernel direct
* mapped region.
*/
mov	r3, pc
mov	r3, r3, lsr #20
orr	r3, r7, r3, lsl #20
add	r0, r4,  #(KERNEL_START & 0xff000000) >> 18
str	r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]!
ldr	r6, =(KERNEL_END - 1)
add	r0, r0, #4
add	r6, r4, r6, lsr #18
1:	cmp	r0, r6
add	r3, r3, #1 << 20
strls	r3, [r0], #4
bls	1b

#ifdef CONFIG_XIP_KERNEL
/*
* Map some ram to cover our .data and .bss areas.
*/
orr	r3, r7, #(KERNEL_RAM_PADDR & 0xff000000)
.if	(KERNEL_RAM_PADDR & 0x00f00000)
orr	r3, r3, #(KERNEL_RAM_PADDR & 0x00f00000)
.endif
add	r0, r4,  #(KERNEL_RAM_VADDR & 0xff000000) >> 18
str	r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> 18]!
ldr	r6, =(_end - 1)
add	r0, r0, #4
add	r6, r4, r6, lsr #18
1:	cmp	r0, r6
add	r3, r3, #1 << 20
strls	r3, [r0], #4
bls	1b
#endif

/*
* Then map first 1MB of ram in case it contains our boot params.
*/
add	r0, r4, #PAGE_OFFSET >> 18
orr	r6, r7, #(PHYS_OFFSET & 0xff000000)
.if	(PHYS_OFFSET & 0x00f00000)
orr	r6, r6, #(PHYS_OFFSET & 0x00f00000)
.endif
str	r6, [r0]

#ifdef CONFIG_DEBUG_LL
#ifndef CONFIG_DEBUG_ICEDCC
/*
* Map in IO space for serial debugging.
* This allows debug messages to be output
* via a serial console before paging_init.
*/
addruart r7, r3

mov	r3, r3, lsr #20
mov	r3, r3, lsl #2

add	r0, r4, r3
rsb	r3, r3, #0x4000			@ PTRS_PER_PGD*sizeof(long)
cmp	r3, #0x0800			@ limit to 512MB
movhi	r3, #0x0800
add	r6, r0, r3
mov	r3, r7, lsr #20
ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
orr	r3, r7, r3, lsl #20
1:	str	r3, [r0], #4
add	r3, r3, #1 << 20
teq	r0, r6
bne	1b

#else /* CONFIG_DEBUG_ICEDCC */
/* we don't need any serial debugging mappings for ICEDCC */
ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
#endif /* !CONFIG_DEBUG_ICEDCC */

#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
/*
* If we're using the NetWinder or CATS, we also need to map
* in the 16550-type serial port for the debug messages
*/
add	r0, r4, #0xff000000 >> 18
orr	r3, r7, #0x7c000000
str	r3, [r0]
#endif
#ifdef CONFIG_ARCH_RPC
/*
* Map in screen at 0x02000000 & SCREEN2_BASE
* Similar reasons here - for debug.  This is
* only for Acorn RiscPC architectures.
*/
add	r0, r4, #0x02000000 >> 18
orr	r3, r7, #0x02000000
str	r3, [r0]
add	r0, r4, #0xd8000000 >> 18
str	r3, [r0]
#endif
#endif
mov	pc, lr
ENDPROC(__create_page_tables)


里面涉及的代码主要就是建立虚拟地址与物理地址的转换,尤其是右移20位和18位两个地方与页表目录项的地址关系比较复杂。执行完该函数后,虚拟内存和物理内存的映射关系如下图所示:



未完待续。。。。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: