您的位置:首页 > 运维架构 > Linux

linux启动流程分析---汇编部分

2011-09-26 00:00 513 查看
当进入linux内核后,arch/arm/kernel/head.S是内核最先执行的一个文件,包括从内核入口ENTRY(stext)到start_kernel之间的初始化代码,下面以我所使用的平台s3c2410为例,说明一下他的汇编代码:

1: __INIT

2: .type stext, %function

3: ENTRY(stext)
/* 程序状态,禁止FIQ、IRQ,设定SVC模式 */

4: msr cpsr_c, #PSR_F_BIT | PSR_I_BIT | MODE_SVC @ ensure svc mode

@ and irqs disabled
/* 判断CPU类型,查找运行的CPU ID值与Linux编译支持的ID值是否支持 */

5: bl __lookup_processor_type @ r5=procinfo r9=cpuid

6: movs r10, r5 @ invalid processor (r5=0)?
/* 判断如果r10的值为0,则表示函数执行错误,跳转到出错处理,*/

7: beq __error_p @ yes, error 'p'
/* 判断体系类型,查看R1寄存器的Architecture Type值是否支持 */

8: bl __lookup_machine_type @ r5=machinfo

9: movs r8, r5 @ invalid machine (r5=0)?
/* 判断如果r8的值为0,则表示函数执行错误,跳转到出错处理,*/

10: beq __error_a @ yes, error 'a'
/* 创建核心页表 */

11: bl __create_page_tables
12: ldr r13, __switch_data @ address to jump to after

@ mmu has been enabled

13: adr lr, __enable_mmu @ return (PIC) address

14: add pc, r10, #PROCINFO_INITFUNC

第4行,准备进入SVC工作模式,同时关闭中断(I_BIT)和快速中断(F_BIT)

第5行,查看处理器类型,主要是为了得到处理器的ID以及页表的flags。

第8行,查看一些体系结构的信息。

第11行,建立页表。

第14行,跳转到处理器的初始化函数,其函数地址是从__lookup_processor_type中得到的,需要注意的是第13行,当处理器初始化完成后,会直接跳转到__enable_mmu去执行,

这是由于初始化函数最后的语句是mov pc, lr。

函数__lookup_processor_type介绍:
内核中使用了一个结构struct proc_info_list,用来记录处理器相关的信息,该结构定义在

kernel/include/asm-arm/procinfo.h头文件中。
/*

* Note! struct processor is always defined if we're

* using MULTI_CPU, otherwise this entry is unused,

* but still exists.

*

* NOTE! The following structure is defined by assembly

* language, NOT C code. For more information, check:

* arch/arm/mm/proc-*.S and arch/arm/kernel/head.S

*/

struct proc_info_list {

unsigned int cpu_val;

unsigned int cpu_mask;

unsigned long __cpu_mmu_flags; /* used by head.S */

unsigned long __cpu_flush; /* used by head.S */

const char *arch_name;

const char *elf_name;

unsigned int elf_hwcap;

const char *cpu_name;

struct processor *proc;

struct cpu_tlb_fns *tlb;

struct cpu_user_fns *user;

struct cpu_cache_fns *cache;

};
在arch/arm/mm/proc-arm920.S文件中定义了所有和arm920有关的proc_info_list,我们使用的arm920定义如下:
.section ".proc.info.init", #alloc, #execinstr

.type __arm920_proc_info,#object

__arm920_proc_info:

.long 0x41009200

.long 0xff00fff0

.

.
由于.section指示符,上面定义的__arm920_proc_info信息在编译的时候被放到了.proc.info段中,这是由linux的链接脚本文件arch/arm/kernel/vmlinux.lds指定的,参考如下:

SECTIONS

{

. = TEXTADDR;

.init : { /* Init code and data */

_stext = .;

_sinittext = .;

*(.init.text)

_einittext = .;

__proc_info_begin = .;

*(.proc.info.init)

__proc_info_end = .;

.

.
这里的符号__proc_info_begin指向.proc.info的起始地址,而符号__proc_info_end指向.proc.info的结束地址。后面就会引用这两个符号,来指向.proc.info这个段。

下面来来看看函数的源代码,为了分析方便将函数按行进行编号,其中17-18行就是前面提到的对.proc.info的引用,
第2行将19行的地址放到寄存器r3中,adr是小范围的地址读取伪指令。
第3行将r3所指向的数据区的数据读出到r5,r6,r9,执行结果是

R5=__proc_info_begin,r6=__proc_info_end,r9=第19行的地址

用仿真器查看:

r5=0xc0018664

r6=0xc0018694

r9=0xc0008324(为何是0xc0008324而不是0x30008324???)
第4行算出虚、实地址的偏移,
第5-6行的结果应该是r5指向__proc_info_begin的实地址,r6指向__proc_info_end的实地址。
第7行读取cpu的id,这是一个协处理器指令,将processor ID存储在r9中。
第8行将r5指向的__arm920_proc_info开始的数据读出放到寄存器r3,r4,结果r3=0x41009200 (cpu_val),r4=0xff00fff0 (cpu_mask)。
第9-10行将读出的id和结构中的id进行比较,如果id相同则返回,返回时r9存储

processor ID,如果id不匹配,则将指针r10增加PROC_INFO_SZ (proc_info_list结构的长度,在这等于48),如果r5小于r6指定的地址,也就是

__proc_info_end,则继续循环比较下一个proc_info_list中的id,如第11-14行的代码,如果查找到__proc_info_end,仍未找到一个匹配的id,则将r5清零并返回,如15-16行,也就是说如果函数执行成功则r5指向匹配的proc_info_list结构地址,如果函数返回错误则r5为0。

/*

* Read processor ID register (CP#15, CR0), and look up in the linker-built

* supported processor list. Note that we can't use the absolute addresses

* for the __proc_info lists since we aren't running with the MMU on

* (and therefore, we are not in the correct address space). We have to

* calculate the offset.

*

* Returns:

* r3, r4, r6 corrupted

* r5 = proc_info pointer in physical address space

* r9 = cpuid

*/
.type __lookup_processor_type, %function

1 __lookup_processor_type:

2 adr r3, 3f

3 ldmda r3, {r5, r6, r9}

4 sub r3, r3, r9 @ get offset between virt&phys

5 add r5, r5, r3 @ convert virt addresses to

6 add r6, r6, r3 @ physical address space

7 mrc p15, 0, r9, c0, c0 @ get processor id

8 1: ldmia r5, {r3, r4} @ value, mask

9 and r4, r4, r9 @ mask wanted bits

10 teq r3, r4

11 beq 2f

12 add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list)

13 cmp r5, r6

14 blo 1b

15 mov r5, #0 @ unknown processor

16 2: mov pc, lr
/*

* Look in include/asm-arm/procinfo.h and arch/arm/kernel/arch.[ch] for

* more information about the __proc_info and __arch_info structures.

*/

17 .long __proc_info_begin

18 .long __proc_info_end

19 3: .long .

20 .long __arch_info_begin

21 .long __arch_info_end

函数__lookup_architecture_type介绍:

每个机器(一般指的是某一个电路板)都有自己的特殊结构,如物理内存地址,物理I/O地址,显存起始地址等等,

这个结构为struct machine_desc,定义在asm-arm/mach/arch.h中:

struct machine_desc {

/*

* Note! The first five elements are used

* by assembler code in head-armv.S

*/

unsigned int nr; /* architecture number */

unsigned int phys_ram; /* start of physical ram */

unsigned int phys_io; /* start of physical io */

unsigned int io_pg_offst; /* byte offset for io

* page tabe entry */

const char *name; /* architecture name*/

unsigned long boot_params; /* tagged list */

unsigned int video_start; /* start of video RAM */

unsigned int video_end; /* end of video RAM */

unsigned int reserve_lp0 :1; /* never has lp0 */

unsigned int reserve_lp1 :1; /* never has lp1 */

unsigned int reserve_lp2 :1; /* never has lp2 */

unsigned int soft_reboot :1; /* soft reboot */

void (*fixup)(struct machine_desc *,

struct tag *, char **,

struct meminfo *);

void (*map_io)(void); /* IO mapping function */

void (*init_irq)(void);

struct sys_timer *timer; /* system tick timer */

void (*init_machine)(void);

};
这个结构一般都定义在(以arm平台为例)kernel\arch\arm\mach-xxx\xxx.c中,是用宏来定义的,以s3c2410的开发板为例:

定义在kernel\arch\arm\mach-s3c2410\mach-smdk2410.c文件中,如下所示:

MACHINE_START(SMDK2410, "SMDK2410") /* @TODO: request a new identifier and switch to SMDK2410 */

/* Maintainer: Jonas Dietsche */

.phys_ram = S3C2410_SDRAM_PA,

.phys_io = S3C2410_PA_UART,

.io_pg_offst = (((u32)S3C24XX_VA_UART) >> 18) & 0xfffc,

.boot_params = S3C2410_SDRAM_PA + 0x100,

.map_io = smdk2410_map_io,

.init_irq = smdk2410_init_irq,

.timer = &s3c24xx_timer,

MACHINE_END
这些宏也定义在kernel/include/asm-arm/mach/arch.h中,以MACHINE_START为例:

#define MACHINE_START(_type,_name) \

const struct machine_desc __mach_desc_##_type \

__attribute__((__section__(".arch.info.init"))) = { \

.nr = MACH_TYPE_##_type, \

.name = _name,

#define MACHINE_END \

};
展开之后结构的是:

__mach_desc_SMDK2410= {

.nr = MACH_TYPE_SMDK2410,

.name = "SMDK2410",
中间的1行__attribute__((__section__(".arch.info"))) = {说明将这个结构放到指定的段.arch.info中,这和前面的

.proc.info是一个意思,__attribute__((__section__的含义参考GNU手册。后面的宏都是类似的含义,这里就不再一一介绍。

下面开始说明源码:
第1行实现r3指向3b的地址,3b如__lookup_processor_type介绍的第19行,
第3行将r3所指向的数据区的数据读出到r4,r5,r6,执行结果是
R5= __arch_info_begin,r6= __arch_info_end,r4=第19行的地址
用仿真器查看:
R5= 0xc0018694
R6= 0xc00186cc
R4= 0xc0008324 (为何是0xc0008324而不是0x30008324???)
第4行算出虚、实地址的偏移,
第5-6行的结果应该是r5指向__arch_info_begin的实地址,r6指向__arch_info_end的实地址。
第7行读取__mach_desc_ SMDK2410结构中的nr参数到r3中,
第8行比较r3和r1中的机器编号是否相同,

r3中的nr值MACH_TYPE_SMDK2410定义在kernel\include\asm-arm\mach-types.h中:

#define MACH_TYPE_SMDK2410 193
r1中的值是由bootloader传递过来的,这在<<linux启动流程分析(1)---bootloader启动内核过程>>中有说明,如果机器编号相同,跳到14行返回。如果不同则将地址指针增加,在跳到7行继续查找,见10--12行的代码,如果检索完所有的machine_desc仍然没有找到则将r5清零并返回。
/*

* Lookup machine architecture in the linker-build list of architectures.

* Note that we can't use the absolute addresses for the __arch_info

* lists since we aren't running with the MMU on (and therefore, we are

* not in the correct address space). We have to calculate the offset.

*

* r1 = machine architecture number

* Returns:

* r3, r4, r6 corrupted

* r5 = mach_info pointer in physical address space

*/
.type __lookup_machine_type, %function

1: __lookup_machine_type:

2: adr r3, 3b

3: ldmia r3, {r4, r5, r6}

4: sub r3, r3, r4 @ get offset between virt&phys

5: add r5, r5, r3 @ convert virt addresses to

6: add r6, r6, r3 @ physical address space

7: 1: ldr r3, [r5, #MACHINFO_TYPE] @ get machine type

8: teq r3, r1 @ matches loader number?

9: beq 2f @ found

10: add r5, r5, #SIZEOF_MACHINE_DESC @ next machine_desc

11: cmp r5, r6

12: blo 1b

13: mov r5, #0 @ unknown machine

14 2: mov pc, lr

函数__create_page_tables介绍:
假设内核起始物理地址是0x30008000,虚拟地址是0xC0008000,下面的代码是建立内核起始处4MB空间的映射,

采用了一级映射方式,即段式(section)映射方式,每段映射范围为1MB空间。于是需要建立4个表项,实现:

虚拟地址0xC0000000~0xC0300000,映射到物理地址0x30000000~0x30300000。
.macro pgtbl, reg, rambase

adr \reg, stext

sub \reg, \reg, #0x4000

.endm

.macro krnladr, rd, pgtable, rambase

bic \rd, \pgtable, #0x000ff000

.endm

/*

* Setup the initial page tables. We only setup the barest

* amount which are required to get the kernel running, which

* generally means mapping in the kernel code.

*

* r8 = machinfo

* r9 = cpuid

* r10 = procinfo

*

* Returns:

* r0, r3, r5, r6, r7 corrupted

* r4 = physical page table address

*/
.type __create_page_tables, %function

__create_page_tables:

ldr r5, [r8, #MACHINFO_PHYSRAM] @ physram r5=0x30000000

pgtbl r4, r5 @ page table address r4=0x30004000

/*

* Clear the 16K level 1 swapper page table

*/

mov r0, r4

mov r3, #0

add r6, r0, #0x4000

1: str r3, [r0], #4

str r3, [r0], #4

str r3, [r0], #4

str r3, [r0], #4

teq r0, r6

bne 1b

ldr r7, [r10, #PROCINFO_MMUFLAGS] @ mmuflags //r7=0x00000c1e

/*

* r7用于设置第一级表描述符之用:

* AP :11(读/写权限)

* 域 :0000

* C : 1(高速缓存)

* B : 1 (缓冲)

* bit[1 0] :10(标识此为节描述符)

*/

/*

* Create identity mapping for first MB of kernel to

* cater for the MMU enable. This identity mapping

* will be removed by paging_init(). We use our current program

* counter to determine corresponding section base address.

*

* 为以后MMU的开启准备好内存头1M空间的转换表,

* 采用平板地址映射模式(表索引==节基址)

*/
mov r6, pc, lsr #20 @ start of kernel section //r6=0x00000300

orr r3, r7, r6, lsl #20 @ flags + kernel base //r3=0x30000c1e

str r3, [r4, r6, lsl #2] @ identity mapping //[0x30004c00]=0x30000c1e

/* 结果:

* 虚拟地址 物理地址

* 0x300000000 0x30000000。

*

* Now setup the pagetables for our kernel direct

* mapped region. We round TEXTADDR down to the

* nearest megabyte boundary. It is assumed that

* the kernel fits within 4 contigous 1MB sections.

*

* 为内核占用的头4M地址准备好转换表(在这采用的就不是平板地址映射模式了)

*/

add r0, r4, #(TEXTADDR & 0xff000000) >> 18 //ro=0x30007000

str r3, [r0, #(TEXTADDR & 0x00f00000) >> 18]! @ KERNEL + 0MB

add r3, r3, #1 << 20 //r3=0x30100c1e

str r3, [r0, #4]! @ KERNEL + 1MB

add r3, r3, #1 << 20 //r3=0x30200c1e

str r3, [r0, #4]! @ KERNEL + 2MB

add r3, r3, #1 << 20 //0x30300c1e

str r3, [r0, #4] @ KERNEL + 3MB

/*

* 结果:

* 虚拟地址 物理地址

0xc0000000 0x30000000

0xc0100000 0x30100000

0xc0200000 0x30200000

0xc0300000 0x30300000

*/

/*

* Then map first 1MB of ram in case it contains our boot params.

*

* 映射sdram的头1M以防boot params需要(不是已经映射过了吗?)

*/

add r0, r4, #VIRT_OFFSET >> 18 //r0=0x30007000

orr r6, r5, r7 //r6=0x30000c1e

str r6, [r0] //[0x30007000]=0x3000c1e
#ifdef CONFIG_XIP_KERNEL
/*

* Map some ram to cover our .data and .bss areas.

* Mapping 3MB should be plenty.

*/

sub r3, r4, r5

mov r3, r3, lsr #20

add r0, r0, r3, lsl #2

add r6, r6, r3, lsl #20

str r6, [r0], #4

add r6, r6, #(1 << 20)

str r6, [r0], #4

add r6, r6, #(1 << 20)

str r6, [r0]

#endif
#ifdef CONFIG_DEBUG_LL

bic r7, r7, #0x0c @ turn off cacheable

@ and bufferable bits

/*

* Map in IO space for serial debugging.

* This allows debug messages to be output

* via a serial console before paging_init.

*/
ldr r3, [r8, #MACHINFO_PGOFFIO]

add r0, r4, r3

rsb r3, r3, #0x4000 @ PTRS_PER_PGD*sizeof(long)

cmp r3, #0x0800 @ limit to 512MB

movhi r3, #0x0800

add r6, r0, r3

ldr r3, [r8, #MACHINFO_PHYSIO]

orr r3, r3, r7

1: str r3, [r0], #4

add r3, r3, #1 << 20

teq r0, r6

bne 1b

#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)

/*

* If we're using the NetWinder, we need to map in

* the 16550-type serial port for the debug messages

*/

teq r1, #MACH_TYPE_NETWINDER

teqne r1, #MACH_TYPE_CATS

bne 1f

add r0, r4, #0xff000000 >> 18

orr r3, r7, #0x7c000000

str r3, [r0]
1:

#endif
#ifdef CONFIG_ARCH_RPC

/*

* Map in screen at 0x02000000 & SCREEN2_BASE

* Similar reasons here - for debug. This is

* only for Acorn RiscPC architectures.

*/
add r0, r4, #0x02000000 >> 18

orr r3, r7, #0x02000000

str r3, [r0]

add r0, r4, #0xd8000000 >> 18

str r3, [r0]

#endif
#endif

mov pc, lr

函数__mmap_switched介绍:

/*

* The following fragment of code is executed with the MMU on, and uses

* absolute addresses; this is not position independent.

*

* r0 = cp#15 control register

* r1 = machine ID

* r9 = processor ID

*/
1: .type __mmap_switched, %function

2: __mmap_switched:

3: adr r3, __switch_data + 4

4: ldmia r3!, {r4, r5, r6, r7}

5: cmp r4, r5 @ Copy data segment if needed

6: 1: cmpne r5, r6

7: ldrne fp, [r4], #4

8: strne fp, [r5], #4

9: bne 1b

10: mov fp, #0 @ Clear BSS (and zero fp)

11:1: cmp r6, r7

12: strcc fp, [r6],#4

13: bcc 1b

14: ldmia r3, {r4, r5, r6, sp}

15: str r9, [r4] @ Save processor ID

16: str r1, [r5] @ Save machine type

17: bic r4, r0, #CR_A @ Clear 'A' bit

18: stmia r6, {r0, r4} @ Save control register values

19: b start_kernel

20: .type __switch_data, %object

21:__switch_data:

22: .long __mmap_switched

23: .long __data_loc @ r4

24: .long __data_start @ r5

25: .long __bss_start @ r6

26: .long _end @ r7

27: .long processor_id @ r4

28: .long __machine_arch_type @ r5

29: .long cr_alignment @ r6

30: .long init_thread_union + THREAD_START_SP @ sp
程序的4行执行完成之后的结果是r4=__data_loc,r5=__data_start,r6=__bss_start,r7=_end,第10-13行将__bss_start到_end清零,定义在vmlinux.lds文件中,如下:

.bss : {

__bss_start = .; /* BSS */

*(.bss)

*(COMMON)

_end = . ;

}

第15-16行分别将处理器类型和机器类型存储到变量processor_id和__machine_arch_type中,这些变量以后会在start_kernel->setup_arch中使用,来得到当前处理器的struct proc_info_list结构和当前系统的machine_desc结构的数据。

第17-18将processor control register保存到cr_alignment中,19行跳转到init/main.c中的start_kernel进入内核启动的第二阶段。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  启动 linux 休闲