您的位置:首页 > 理论基础 > 计算机网络

Linux内核--网络栈实现分析(六)--应用层获取数据包(上)

2013-04-13 13:27 232 查看
本文分析基于内核Linux 1.2.13

原创作品,转载请标明http://blog.csdn.net/yming0221/article/details/7541907

更多请看专栏,地址http://blog.csdn.net/column/details/linux-kernel-net.html

作者:闫明

注:标题中的”(上)“,”(下)“表示分析过程基于数据包的传递方向:”(上)“表示分析是从底层向上分析、”(下)“表示分析是从上向下分析。

上篇博文分析了传输层从网络层获取数据包后将数据包缓存结构sk_buff挂载到特定的sock结构的接收队列中。

这里接着分析应用程序是如何从传输层获取网络数据包的。应用层要得到传输层的数据包有两种主要的方式:系统调用和文件操作。



系统调用:

Linux下用户程序是通过系统调用来从用户态到内核态,调用内核功能来完成相应的服务。

网络栈的一些功能是通过系统调用sys_socketcall来完成的

具体的代码在net/socket.c中,该文件中的函数就相当于一个桥梁,在系统调用和内核网络栈之间。

[cpp] view
plaincopy

/*

* System call vectors. Since I (RIB) want to rewrite sockets as streams,

* we have this level of indirection. Not a lot of overhead, since more of

* the work is done via read/write/select directly.

*

* I'm now expanding this up to a higher level to separate the assorted

* kernel/user space manipulations and global assumptions from the protocol

* layers proper - AC.

*/

asmlinkage int sys_socketcall(int call, unsigned long *args)

{

int er;

switch(call)

{

case SYS_SOCKET:

er=verify_area(VERIFY_READ, args, 3 * sizeof(long));

if(er)

return er;

return(sock_socket(get_fs_long(args+0),

get_fs_long(args+1),

get_fs_long(args+2)));

case SYS_BIND:

er=verify_area(VERIFY_READ, args, 3 * sizeof(long));

if(er)

return er;

return(sock_bind(get_fs_long(args+0),

(struct sockaddr *)get_fs_long(args+1),

get_fs_long(args+2)));

case SYS_CONNECT:

er=verify_area(VERIFY_READ, args, 3 * sizeof(long));

if(er)

return er;

return(sock_connect(get_fs_long(args+0),

(struct sockaddr *)get_fs_long(args+1),

get_fs_long(args+2)));

case SYS_LISTEN:

er=verify_area(VERIFY_READ, args, 2 * sizeof(long));

if(er)

return er;

return(sock_listen(get_fs_long(args+0),

get_fs_long(args+1)));

case SYS_ACCEPT:

er=verify_area(VERIFY_READ, args, 3 * sizeof(long));

if(er)

return er;

return(sock_accept(get_fs_long(args+0),

(struct sockaddr *)get_fs_long(args+1),

(int *)get_fs_long(args+2)));

case SYS_GETSOCKNAME:

er=verify_area(VERIFY_READ, args, 3 * sizeof(long));

if(er)

return er;

return(sock_getsockname(get_fs_long(args+0),

(struct sockaddr *)get_fs_long(args+1),

(int *)get_fs_long(args+2)));

case SYS_GETPEERNAME:

er=verify_area(VERIFY_READ, args, 3 * sizeof(long));

if(er)

return er;

return(sock_getpeername(get_fs_long(args+0),

(struct sockaddr *)get_fs_long(args+1),

(int *)get_fs_long(args+2)));

case SYS_SOCKETPAIR:

er=verify_area(VERIFY_READ, args, 4 * sizeof(long));

if(er)

return er;

return(sock_socketpair(get_fs_long(args+0),

get_fs_long(args+1),

get_fs_long(args+2),

(unsigned long *)get_fs_long(args+3)));

case SYS_SEND:

er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long));

if(er)

return er;

return(sock_send(get_fs_long(args+0),

(void *)get_fs_long(args+1),

get_fs_long(args+2),

get_fs_long(args+3)));

case SYS_SENDTO:

er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long));

if(er)

return er;

return(sock_sendto(get_fs_long(args+0),

(void *)get_fs_long(args+1),

get_fs_long(args+2),

get_fs_long(args+3),

(struct sockaddr *)get_fs_long(args+4),

get_fs_long(args+5)));

case SYS_RECV:

er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long));

if(er)

return er;

return(sock_recv(get_fs_long(args+0),

(void *)get_fs_long(args+1),

get_fs_long(args+2),

get_fs_long(args+3)));

case SYS_RECVFROM:

er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long));

if(er)

return er;

return(sock_recvfrom(get_fs_long(args+0),

(void *)get_fs_long(args+1),

get_fs_long(args+2),

get_fs_long(args+3),

(struct sockaddr *)get_fs_long(args+4),

(int *)get_fs_long(args+5)));

case SYS_SHUTDOWN:

er=verify_area(VERIFY_READ, args, 2* sizeof(unsigned long));

if(er)

return er;

return(sock_shutdown(get_fs_long(args+0),

get_fs_long(args+1)));

case SYS_SETSOCKOPT:

er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long));

if(er)

return er;

return(sock_setsockopt(get_fs_long(args+0),

get_fs_long(args+1),

get_fs_long(args+2),

(char *)get_fs_long(args+3),

get_fs_long(args+4)));

case SYS_GETSOCKOPT:

er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long));

if(er)

return er;

return(sock_getsockopt(get_fs_long(args+0),

get_fs_long(args+1),

get_fs_long(args+2),

(char *)get_fs_long(args+3),

(int *)get_fs_long(args+4)));

default:

return(-EINVAL);

}

}

上面系统调用的宏定义如下:

[cpp] view
plaincopy

#define SYS_SOCKET 1 /* sys_socket(2) */

#define SYS_BIND 2 /* sys_bind(2) */

#define SYS_CONNECT 3 /* sys_connect(2) */

#define SYS_LISTEN 4 /* sys_listen(2) */

#define SYS_ACCEPT 5 /* sys_accept(2) */

#define SYS_GETSOCKNAME 6 /* sys_getsockname(2) */

#define SYS_GETPEERNAME 7 /* sys_getpeername(2) */

#define SYS_SOCKETPAIR 8 /* sys_socketpair(2) */

#define SYS_SEND 9 /* sys_send(2) */

#define SYS_RECV 10 /* sys_recv(2) */

#define SYS_SENDTO 11 /* sys_sendto(2) */

#define SYS_RECVFROM 12 /* sys_recvfrom(2) */

#define SYS_SHUTDOWN 13 /* sys_shutdown(2) */

#define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */

#define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */

应用层在一系列操作后就可以通过参数SYS_RECV或SYS_RECVFROM来获取数据包。由于UDP是无连接的,所以如果需要回复,必须使用recvfrom才能得知是谁发送的数据包。当然UDP也可以用recv类函数,只是它不能回复,只能接收。

这里还是以INET中UDP来举例说明。

如果系统调用参数是SYS_RECVFROM,则会进行内存校验后执行函数socket_recvform()函数。

[cpp] view
plaincopy

/*

* Receive a frame from the socket and optionally record the address of the

* sender. We verify the buffers are writable and if needed move the

* sender address from kernel to user space.

*/

static int sock_recvfrom(int fd, void * buff, int len, unsigned flags,

struct sockaddr *addr, int *addr_len)

{

struct socket *sock;

struct file *file;

char address[MAX_SOCK_ADDR];

int err;

int alen;

if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))

return(-EBADF);

if (!(sock = sockfd_lookup(fd, NULL)))

return(-ENOTSOCK);

if(len<0)

return -EINVAL;

if(len==0)

return 0;

err=verify_area(VERIFY_WRITE,buff,len);

if(err)

return err;

//进行相应检查后调用下层函数,INET域则为inet_recvfrom()函数

len=sock->ops->recvfrom(sock, buff, len, (file->f_flags & O_NONBLOCK),

flags, (struct sockaddr *)address, &alen);

if(len<0)

return len;

if(addr!=NULL && (err=move_addr_to_user(address,alen, addr, addr_len))<0)//将发送发地址从内核空间COPY到用户空间

return err;

return len;

}

在inet_recvfrom()函数中会调用具体的协议操作函数。UDP的协议操作函数定义如下:

[cpp] view
plaincopy

struct proto udp_prot = {

sock_wmalloc,

sock_rmalloc,

sock_wfree,

sock_rfree,

sock_rspace,

sock_wspace,

udp_close,

udp_read,

udp_write,

udp_sendto,

udp_recvfrom,

ip_build_header,

udp_connect,

NULL,

ip_queue_xmit,

NULL,

NULL,

NULL,

udp_rcv,

datagram_select,

udp_ioctl,

NULL,

NULL,

ip_setsockopt,

ip_getsockopt,

128,

0,

{NULL,},

"UDP",

0, 0

};

可以看到,其对应的函数对udp_recvfrom()

[cpp] view
plaincopy

/*

* This should be easy, if there is something there we\

* return it, otherwise we block.

*/

int udp_recvfrom(struct sock *sk, unsigned char *to, int len,

int noblock, unsigned flags, struct sockaddr_in *sin,

int *addr_len)

{

int copied = 0;

int truesize;

struct sk_buff *skb;

int er;

/*

* Check any passed addresses

*/

if (addr_len)

*addr_len=sizeof(*sin);

/*

* From here the generic datagram does a lot of the work. Come

* the finished NET3, it will do _ALL_ the work!

*/

skb=skb_recv_datagram(sk,flags,noblock,&er);

if(skb==NULL)

return er;

truesize = skb->len;

copied = min(len, truesize);

/*

* FIXME : should use udp header size info value

*/

skb_copy_datagram(skb,sizeof(struct udphdr),to,copied);//从sk_buff结构中取出数据部分

sk->stamp=skb->stamp;

/* Copy the address. */

if (sin)

{

sin->sin_family = AF_INET;

sin->sin_port = skb->h.uh->source;

sin->sin_addr.s_addr = skb->daddr;

}

skb_free_datagram(skb);

release_sock(sk);

return(truesize);

}

这样数据就到达了用户空间。

普通文件操作函数接口

最主要的函数就是读写函数:sock_read和sock_write,可以通过文件操作来完成网络数据的读写。谈到文件,就得有文件描述符,文件描述符中的f_inode指针指向文件的存储结点结构。

文件操作集定义如下:

[cpp] view
plaincopy

static struct file_operations socket_file_ops = {

sock_lseek,

sock_read,

sock_write,

sock_readdir,

sock_select,

sock_ioctl,

NULL, /* mmap */

NULL, /* no special open code... */

sock_close,

NULL, /* no fsync */

sock_fasync

};

read函数和write函数与recvfrom和send类似,这里列出函数,方便查看。

[cpp] view
plaincopy

/*

* Read data from a socket. ubuf is a user mode pointer. We make sure the user

* area ubuf...ubuf+size-1 is writable before asking the protocol.

*/

static int sock_read(struct inode *inode, struct file *file, char *ubuf, int size)

{

struct socket *sock;

int err;

if (!(sock = socki_lookup(inode)))

{

printk("NET: sock_read: can't find socket for inode!\n");

return(-EBADF);

}

if (sock->flags & SO_ACCEPTCON)

return(-EINVAL);

if(size<0)

return -EINVAL;

if(size==0)

return 0;

if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0)

return err;

return(sock->ops->read(sock, ubuf, size, (file->f_flags & O_NONBLOCK)));//和recvfrom函数类似,调用INET域相应函数

}

上面会调用inet_read()函数,inet_read()函数会调用udp_read()函数,而udp_read()是通过调用udp_recvfrom()完成功能的。

这两种方式是内核网络栈对用户的接口。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐