您的位置:首页 > 运维架构 > Linux

linux0.12之内核代码之『深入追踪fork函数』

2015-07-09 08:55 761 查看
在上一篇fork.c分析中简单分析了内核中fork的实现,那从用户层去分析fork函数的实现。

目前已经知道这是一个系统调用函数,看看能不能找到fork函数原型,很不幸花了十分又十分钟,还是没有找到。

但是在内核init中的main.c有调用fork函数,就以此为线索吧。

void main(void)  /* This really IS void, no error here. */
{    /* The startup routine assumes (well, ...) this */
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
ROOT_DEV = ORIG_ROOT_DEV;
SWAP_DEV = ORIG_SWAP_DEV;
sprintf(term, "TERM=con%dx%d", CON_COLS, CON_ROWS);
envp[1] = term;
envp_rc[1] = term;
drive_info = DRIVE_INFO;
memory_end = (1<<20) + (EXT_MEM_K<<10);
memory_end &= 0xfffff000;
if (memory_end > 16*1024*1024)
memory_end = 16*1024*1024;
if (memory_end > 12*1024*1024)
buffer_memory_end = 4*1024*1024;
else if (memory_end > 6*1024*1024)
buffer_memory_end = 2*1024*1024;
else
buffer_memory_end = 1*1024*1024;
main_memory_start = buffer_memory_end;
#ifdef RAMDISK
main_memory_start += rd_init(main_memory_start, RAMDISK*1024);
#endif
mem_init(main_memory_start,memory_end);
trap_init();
blk_dev_init();
chr_dev_init();
tty_init();
time_init();
sched_init();
buffer_init(buffer_memory_end);
hd_init();
floppy_init();
sti();
move_to_user_mode();
if (!fork()) {  /* we count on this going ok */
init();
}


最后有提到fork,但是利用怎么也找不到fork这个函数的定义,只在unistd.h中有声明。

那就搜索含有fork的关键字吧,

在main.c的最上面有

/*
* we need this inline - forking from kernel space will result
* in NO COPY ON WRITE (!!!), until an execve is executed. This
* is no problem, but for the stack. This is handled by not letting
* main() use the stack at all after fork(). Thus, no function
* calls - which means inline code for fork too, as otherwise we
* would use the stack upon exit from 'fork()'.
*
* Actually only pause and fork are needed inline, so that there
* won't be any messing with the stack from main(), but we define
* some others too.
*/
static inline _syscall0(int,fork)
static inline _syscall0(int,pause)
static inline _syscall1(int,setup,void *,BIOS)
static inline _syscall0(int,sync)


出现fork函数了这是一个宏调用,这是一个内联函数,

#define _syscall0(type,name) \
type name(void) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name)); \
if (__res >= 0) \
return (type) __res; \
errno = -__res; \
return -1; \
}


将这个宏展开,就会得到fork的函数实现

来来展开看看

syscall0(int,fork)

int fork(void)
{
long __res;    \
__asm__ volatile ( "int  $0x80" \
:"=a" (__res) \
:"0"  (__NR_fork) \
); \
if(__res >= 0)
return (int) __res;
errno = -__res;
return -1;
}


int 0x80就是系统调用,将NR_fork调用号注册进去

#define __NR_fork   2


看看 int 0x80 会中断执行什么,

首先我们知道,linux0.12中会有一个中断描述符表(IDT),就类似于中断向量表,

那常规理解,肯定要去注册才能使用,

所以在sched.c调度代码中有一个初始化调度函数

void sched_init(void)
{
int i;
struct desc_struct * p;

if (sizeof(struct sigaction) != 16)
panic("Struct sigaction MUST be 16 bytes");
set_tss_desc(gdt+FIRST_TSS_ENTRY,&(init_task.task.tss));
set_ldt_desc(gdt+FIRST_LDT_ENTRY,&(init_task.task.ldt));
p = gdt+2+FIRST_TSS_ENTRY;
for(i=1;i<NR_TASKS;i++) {
task[i] = NULL;
p->a=p->b=0;
p++;
p->a=p->b=0;
p++;
}
/* Clear NT, so that we won't have troubles with that later on */
__asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
ltr(0);
lldt(0);
outb_p(0x36,0x43);  /* binary, mode 3, LSB/MSB, ch 0 */
outb_p(LATCH & 0xff , 0x40);   /* LSB */
outb(LATCH >> 8 , 0x40);   /* MSB */
set_intr_gate(0x20,&timer_interrupt);
outb(inb_p(0x21)&~0x01,0x21);
set_system_gate(0x80,&system_call);
}


其中set_system_gate(0x80,&system_call);,很吊呀,

很像呀,

看看这个函数是什么gui,在system.h中,是一个宏

#define set_system_gate(n,addr) \
_set_gate(&idt
,15,3,addr)
将里面的宏再展开
#define _set_gate(gate_addr,type,dpl,addr) \
__asm__ ("movw %%dx,%%ax\n\t" \
"movw %0,%%dx\n\t" \
"movl %%eax,%1\n\t" \
"movl %%edx,%2" \
: \
: "i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
"o" (*((char *) (gate_addr))), \
"o" (*(4+(char *) (gate_addr))), \
"d" ((char *) (addr)),"a" (0x00080000))


大致就是将addr地址注册到idt第0x80表项中。

所以到现在 明白了一点

执行int 0x80系统就会去调用 system_call函数。(其实这就是常规的中断调用机制)

接下来回到 fork函数

在调用int 0x80时 ,还传递了一个参数放到eax中,

看看system_call是什麽gui,在sys_call.中

_system_call:
push %ds
push %es
push %fs
pushl %eax                       # save the orig_eax
pushl %edx
pushl %ecx                       # push %ebx,%ecx,%edx as parameters
pushl %ebx                       # to the system call
movl $0x10,%edx             # set up ds,es to kernel space
mov %dx,%ds
mov %dx,%es
movl $0x17,%edx             # fs points to local data space
mov %dx,%fs
cmpl _NR_syscalls,%eax
jae bad_sys_call
call _sys_call_table(,%eax,4)
pushl %eax
其中这几句是关键
cmpl _NR_syscalls,%eax
jae bad_sys_call
call _sys_call_table(,%eax,4)
在include/linux/sys.h中的
/* So we don't have to do any more manual updating.... */
int NR_syscalls = sizeof(sys_call_table)/sizeof(fn_ptr);

fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
sys_unlink, sys_execve, sys_chdir, sys_time, sys_mknod, sys_chmod,
sys_chown, sys_break, sys_stat, sys_lseek, sys_getpid, sys_mount,
sys_umount, sys_setuid, sys_getuid, sys_stime, sys_ptrace, sys_alarm,
sys_fstat, sys_pause, sys_utime, sys_stty, sys_gtty, sys_access,
sys_nice, sys_ftime, sys_sync, sys_kill, sys_rename, sys_mkdir,
sys_rmdir, sys_dup, sys_pipe, sys_times, sys_prof, sys_brk, sys_setgid,
sys_getgid, sys_signal, sys_geteuid, sys_getegid, sys_acct, sys_phys,
sys_lock, sys_ioctl, sys_fcntl, sys_mpx, sys_setpgid, sys_ulimit,
sys_uname, sys_umask, sys_chroot, sys_ustat, sys_dup2, sys_getppid,
sys_getpgrp, sys_setsid, sys_sigaction, sys_sgetmask, sys_ssetmask,
sys_setreuid,sys_setregid, sys_sigsuspend, sys_sigpending, sys_sethostname,
sys_setrlimit, sys_getrlimit, sys_getrusage, sys_gettimeofday,
sys_settimeofday, sys_getgroups, sys_setgroups, sys_select, sys_symlink,
sys_lstat, sys_readlink, sys_uselib };
在sched.h中
typedef int (*fn_ptr)();


从这三个片段代码知道,sys_call_table就是一张函数指针数组,NR_syscalls就是用于计算数组元素的个数。

在system_call中怎么call一个数组呢,其实 call _sys_call_table(,%eax,4)

是这样的调用地址=sys_call_table + %eax*4;乘以四是因为,每个数组元素占4个字节(函数地址么)。

所以

cmpl _NR_syscalls,%eax
jae bad_sys_call
call _sys_call_table(,%eax,4)


的功能就是,判断调用参数是否超出系统调用的最大调用号,如果正常,调用传入调用号对应的系统函数。

整体流程为



下面的任务就是 研究sys_fork这个系统函数了.还在sys_call.s中

.align 2
_sys_fork:
call _find_empty_process
testl %eax,%eax
js 1f
push %gs
pushl %esi
pushl %edi
pushl %ebp
pushl %eax
call _copy_process
addl $20,%esp
1:  ret


这里调用了两个函数 这在我上一篇已经分析了

/article/7829062.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: