您的位置:首页 > 运维架构 > Linux

Linux 缺页中断发展历史

2015-01-20 11:13 169 查看
慢慢来吧~~



Linux V0.11

缺页中断响应代码:

.globl _page_fault

_page_fault:
xchgl %eax,(%esp)
pushl %ecx
pushl %edx
push %ds
push %es
push %fs
movl $0x10,%edx
mov %dx,%ds
mov %dx,%es
mov %dx,%fs
movl %cr2,%edx
pushl %edx
pushl %eax
testl $1,%eax         // 检测当前页的共享位
jne 1f
call _do_no_page      // 没有共享就缺页
jmp 2f
1:	call _do_wp_page      // 有共享就是写时复制
2:	addl $8,%esp
pop %fs
pop %es
pop %ds
popl %edx
popl %ecx
popl %eax
iret


核心接口:

1. 缺页处理接口 do_no_page

void do_no_page(unsigned long error_code,unsigned long address)
{
int nr[4];
unsigned long tmp;
unsigned long page;
int block,i;

address &= 0xfffff000;
tmp = address - current->start_code;
if (!current->executable || tmp >= current->end_data) {
get_empty_page(address);
return;
}
if (share_page(tmp))
return;
if (!(page = get_free_page()))
oom();
/* remember that 1 block is used for header */
block = 1 + tmp/BLOCK_SIZE;
for (i=0 ; i<4 ; block++,i++)
nr[i] = bmap(current->executable,block);
bread_page(page,current->executable->i_dev,nr);
i = tmp + 4096 - current->end_data;
tmp = page + 4096;
while (i-- > 0) {
tmp--;
*(char *)tmp = 0;
}
if (put_page(page,address))
return;
free_page(page);
oom();
}


流程图



2.写时复制的复制函数: do_wp_page

void un_wp_page(unsigned long * table_entry)
{
unsigned long old_page,new_page;

old_page = 0xfffff000 & *table_entry;
if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {
*table_entry |= 2;
invalidate();
return;
}
if (!(new_page=get_free_page()))
oom();
if (old_page >= LOW_MEM)
mem_map[MAP_NR(old_page)]--;
*table_entry = new_page | 7;
invalidate();
copy_page(old_page,new_page);
}

/*
* This routine handles present pages, when users try to write
* to a shared page. It is done by copying the page to a new address
* and decrementing the shared-page counter for the old page.
*
* If it's in code space we exit with a segment error.
*/
void do_wp_page(unsigned long error_code,unsigned long address)
{
#if 0
/* we cannot do this yet: the estdio library writes to code space */
/* stupid, stupid. I really want the libc.a from GNU */
if (CODE_SPACE(address))
do_exit(SIGSEGV);
#endif
un_wp_page((unsigned long *)
(((address>>10) & 0xffc) + (0xfffff000 &
*((unsigned long *) ((address>>20) &0xffc)))));

}


3. 写时复制函数

void write_verify(unsigned long address)
{
unsigned long page;

if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1)) //页表有效
return;
page &= 0xfffff000;
page += ((address>>10) & 0xffc); // 页表偏移
if ((3 & *(unsigned long *) page) == 1)  /* non-writeable, present */
un_wp_page((unsigned long *) page);
return;
}


Linux V0.12

1. 对do_wp_page 添加了参数有效性检测:

void do_wp_page(unsigned long error_code,unsigned long address)
{
if (address < TASK_SIZE)
printk("\n\rBAD! KERNEL MEMORY WP-ERR!\n\r");
if (address - current->start_code > TASK_SIZE) {
printk("Bad things happen: page error in do_wp_page\n\r");
do_exit(SIGSEGV);
}

un_wp_page((unsigned long *)
(((address>>10) & 0xffc) + (0xfffff000 &
*((unsigned long *) ((address>>20) &0xffc)))));

}


2. do_no_page 加入了地址有效性和对虚拟内存的支持:

源码:

void do_no_page(unsigned long error_code,unsigned long address)
{
int nr[4];
unsigned long tmp;
unsigned long page;
int block,i;
struct m_inode * inode;

if (address < TASK_SIZE)
printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");
if (address - current->start_code > TASK_SIZE) {
printk("Bad things happen: nonexistent page error in do_no_page\n\r");
do_exit(SIGSEGV);
}
page = *(unsigned long *) ((address >> 20) & 0xffc);
if (page & 1) {
page &= 0xfffff000;
page += (address >> 10) & 0xffc;
tmp = *(unsigned long *) page;
if (tmp && !(1 & tmp)) {
swap_in((unsigned long *) page);
return;
}
}
address &= 0xfffff000;
tmp = address - current->start_code;
if (tmp >= LIBRARY_OFFSET ) {
inode = current->library;
block = 1 + (tmp-LIBRARY_OFFSET) / BLOCK_SIZE;
} else if (tmp < current->end_data) {
inode = current->executable;
block = 1 + tmp / BLOCK_SIZE;
} else {
inode = NULL;
block = 0;
}
if (!inode) {
get_empty_page(address);
return;
}
if (share_page(inode,tmp))
return;
if (!(page = get_free_page()))
oom();
/* remember that 1 block is used for header */
for (i=0 ; i<4 ; block++,i++)
nr[i] = bmap(inode,block);
bread_page(page,inode->i_dev,nr);
i = tmp + 4096 - current->end_data;
if (i>4095)
i = 0;
tmp = page + 4096;
while (i-- > 0) {
tmp--;
*(char *)tmp = 0;
}
if (put_page(page,address))
return;
free_page(page);
oom();
}


流程图:



Linux V0.95

1 . 首先, 缺页中段的响应代码从page.s移除( page.s 不存在了) , 加入到了 kernek/asm.s. 且直接调用do_page_fault

_page_fault:
pushl $_do_page_fault
jmp error_code


2. 对与引起缺页中断的原有的判断在 do_page_fault内

/* This routine handles page faults.  It determines the address,
and the problem then passes it off to one of the appropriate
routines. */
void do_page_fault (unsigned long *esp, unsigned long error_code)
{
unsigned long address;
/* get the address */

__asm__ ("movl %%cr2,%0":"=r" (address));
if (!(error_code & 1)) {
do_no_page(error_code, address, current);
return;
} else {
do_wp_page(error_code, address);
return;
}
}


3. 写时复制调用的的接口 un_wp_page 对于内存耗尽的情况做了循环和更多的保护, 不再直接操作page数组的count , 该用free_page 来释放一个引用.

void un_wp_page(unsigned long * table_entry)
{
unsigned long old_page;
unsigned long new_page = 0;
unsigned long dirty;

repeat:
old_page = *table_entry;
dirty = old_page & PAGE_DIRTY;
if (!(old_page & 1)) {
if (new_page)
free_page(new_page);
return;
}
old_page &= 0xfffff000;
if (old_page >= HIGH_MEMORY) {
if (new_page)
free_page(new_page);
printk("bad page address\n\r");
do_exit(SIGSEGV);
}
if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) {
*table_entry |= 2;
invalidate();
if (new_page)
free_page(new_page);
return;
}
if (!new_page) {
if (!(new_page=get_free_page()))
oom();                           // 防止 oom 没有推出程序
goto repeat;                             // 再来一遍
}
copy_page(old_page,new_page);
*table_entry = new_page | dirty | 7 ;
free_page(old_page);                             // 释放一个引用
invalidate();
}


4. do_no_page 加入更多检测合法性的代码.

Linux V0.95a

加入更多的合法性检测

if (*page_table) {
printk("put_dirty_page: page already exists\n");
*page_table = 0;
invalidate();
}


Linux V0.95c

加入一点对进程的操作

例如在do_wp_page中

++current->min_flt;


Linux V0.96b

将获取空内存页的接口进行了一层包装, 处理了内存不足的问题. 从而像接口 do_no_page 之类的可以不再考虑内存不足.

/*
* fill in an empty page or directory if none exists
*/
static unsigned long get_empty(unsigned long * p)
{
unsigned long page = 0;

repeat:
if (1 & *p) {
free_page(page);
return *p;
}
if (*p) {
printk("get_empty: bad page entry \n");
*p = 0;
}
if (page) {
*p = page | 7;
return *p;
}
if (!(page = get_free_page()))
oom();
goto repeat;
}


接口do_no_page

void do_no_page(unsigned long error_code, unsigned long address,
struct task_struct *tsk, unsigned long user_esp)
{
static unsigned int last_checked = 0;
int nr[4];
unsigned long tmp;
unsigned long page;
unsigned int block,i;
struct inode * inode;

/* Thrashing ? Make it interruptible, but don't penalize otherwise */
for (i = 0; i < CHECK_LAST_NR; i++)
if ((address & 0xfffff000) == last_pages[i]) {
current->counter = 0;
schedule();
}
last_checked++;
if (last_checked >= CHECK_LAST_NR)
last_checked = 0;
last_pages[last_checked] = address & 0xfffff000;
if (address < TASK_SIZE) {
printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");
do_exit(SIGSEGV);
}
if (address - tsk->start_code >= TASK_SIZE) {
printk("Bad things happen: nonexistent page error in do_no_page\n\r");
do_exit(SIGSEGV);
}
page = get_empty((unsigned long *) ((address >> 20) & 0xffc));                // 这里直接获取新内存页, 不再考虑内存不足
page &= 0xfffff000;
page += (address >> 10) & 0xffc;
tmp = *(unsigned long *) page;
if (tmp & 1) {
printk("bogus do_no_page\n");
return;
}
++tsk->rss;
if (tmp) {
++tsk->maj_flt;
swap_in((unsigned long *) page);
return;
}
address &= 0xfffff000;
tmp = address - tsk->start_code;
inode = NULL;
block = 0;
if (tmp < tsk->end_data) {
inode = tsk->executable;
block = 1 + tmp / BLOCK_SIZE;
} else {
i = tsk->numlibraries;
while (i-- > 0) {
if (tmp < tsk->libraries[i].start)
continue;
block = tmp - tsk->libraries[i].start;
if (block >= tsk->libraries[i].length)
continue;
inode = tsk->libraries[i].library;
block = 1 + block / BLOCK_SIZE;
break;
}
}
if (!inode) {
++tsk->min_flt;
get_empty_page(address);
if (tsk != current)
return;
if (tmp >= LIBRARY_OFFSET || tmp < tsk->brk)
return;
if (tmp+8192 >= (user_esp & 0xfffff000))
return;
send_sig(SIGSEGV,tsk,1);
return;
}
if (tsk == current)
if (share_page(inode,tmp)) {
++tsk->min_flt;
return;
}
++tsk->maj_flt;
if (!(page = get_free_page()))
oom();
for (i=0 ; i<4 ; block++,i++)
nr[i] = bmap(inode,block);
bread_page(page,inode->i_dev,nr);
i = tmp + 4096 - tsk->end_data;
if (i>4095)
i = 0;
tmp = page + 4096;
while (i--) {
tmp--;
*(char *)tmp = 0;
}
if (put_page(page,address))
return;
free_page(page);
oom();
}


之后的版本不再默认一级页表总是在地址0

越来越复杂了
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: