您的位置:首页 > 理论基础 > 计算机网络

Linux网络协议栈(一) -- socket入门

2013-08-09 16:14 519 查看
原文地址:(一) http://www.cnblogs.com/hustcat/archive/2009/09/17/1568738.html

(二)http://www.cnblogs.com/hustcat/archive/2009/09/17/1568765.html

1、TCP/IP参考模型

为了实现各种网络的互连,国际标准化组织(ISO)制定了开放式系统互连(OSI)参考模型。尽管OSI的体系结构从理论上讲是比较完整的,但实际上,完全符合OSI各层协议的商用产品却很少进入市场。而使用TCP/IP
协议的产品却大量涌入市场,几乎所有的工作站都配有TCP/IP协议,使得TCP/IP 成为计算机网络的实际的国际标准。



2、套接字(socket)

socket是操作系统的重要组成部分之一,它是网络应用程序的基础。从层次上来说,它位于应用层,是操作系统为应用程序员提供的API,通过它,应用程序可以访问传输层协议。

1、socket 位于传输层协议之上,屏蔽了不同网络协议之间的差异;

2、socket是网络编程的入口,它提供了大量的系统调用,构成了网络程序的主体;

3、在Linux系统中,socket属于文件系统的一部分,网络通信可以被看作是对文件的读取,使得我们对网络的控制和对文件的控制一样方便。



2.1、套接字地址

在传输层上,通信端点可由Internet上3个参数描述:所用的协议、IP地址和端口号。这些内容由sockaddr描述:



//usr/include/sys/socket.h

typedef unsigned short sa_family_t;

//通用socket地址

struct sockaddr {

sa_family_t sa_family; /* address family, AF_xxx,协议簇*/

char sa_data[14]; /* 14 bytes of protocol address */

};

//usr/include/netinet/in.h

//INET地址簇的socket地址

struct in_addr {

__u32 s_addr;

};

struct sockaddr_in {

sa_family_t sin_family; /* Address family: AF_INET */

unsigned short int sin_port; /* Port number,端口*/

struct in_addr sin_addr; /* Internet address,IP地址*/

/* Pad to size of 'struct sockaddr' . */

unsigned char sin_zero[sizeof (struct sockaddr) -

sizeof (sa_family_t) -

sizeof (uint16_t) -

sizeof (struct in_addr)];

};




Linux 支持的套接字地址族:

套接字地址族
描述
UNIX
UNIX 域套接字
INET
通过 TCP/IP 协议支持的 Internet 地址族
AX25
Amater radio X25
APPLETALK
Appletalk DDP
IPX
Novell IPX
X25
X25
Linux 所支持的BSD套接字类型:

BSD 套接字类型
描述
流(stream)
这种套接字提供了可靠的双向顺序数据流,可保证数据不会在传输过程中丢失、破坏或重复出现。流套接字通过 INET 地址族的 TCP 协议实现。
数据报(datagram)
这种套接字也提供双向的数据传输,但是并不对数据的传输提供担保,也就是说,数据可能会以错误的顺序传递,甚至丢失或破坏。这种类型的套接字通过 INET 地址族的 UDP 协议实现。
原始(raw)
利用这种类型的套接字,进程可以直接访问底层协议(因此称为原始)。例如,可在某个以太网设备上打开原始套接字,然后获取原始的 IP 数据传输信息。
可靠发送的消息
和数据报套接字类似,但保证数据被正确传输到目的端。
顺序数据包
和流套接字类似,但数据包大小是固定的。
数据包(packet)
这并不是标准的 BSD 套接字类型,它是 Linux 专有的 BSD 套接字扩展,可允许进程直接在设备级访问数据包。
2.2、套接字操作

套接字(更确切的说是BSD套接字)为应用程序提供了基本的API,这些API是编写网络应用程序的基础。



3、套接字的实现

套接字最先是在UNIX的BSD版本实现的,所以也叫做BSD套接字,它隐藏了各个协议之间的差异,并向上提供统一的接口。Linux中实现套接字的基本结构:



3.1、BSD套接字

3.1.1、核心数据结构

为了实现BSD套接字,内核提供一个重要的数据结构struct socket,它的定义如下:

//BSD套接字(include/linux/net.h)
struct socket {

socket_state state; //套接字状态

unsigned long flags;

struct proto_ops *ops;
//操作函数集

struct fasync_struct *fasync_list;

struct file *file;
//每个BSD套接字都有一个inode结点,通过文件对象与其关联起来

struct sock *sk;
//socket内部结构,与具体的协议簇(比如PF_INET)相关

wait_queue_head_t wait;

short type; //套接字类型:如SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, SOCK_RDM, SOCK_SEQPACKET, and SOCK_PACKET

unsigned char passcred;

};

//BSD套接字操作函数集

struct proto_ops {

int family;

struct module *owner;

int (*release)
(struct socket *sock);

int (*bind)
(struct socket *sock, struct sockaddr *myaddr, int sockaddr_len);

int (*connect)
(struct socket *sock, struct sockaddr *vaddr,
int sockaddr_len, int flags);

int (*socketpair)
(struct socket *sock1,struct socket *sock2);

int (*accept)
(struct socket *sock, struct socket *newsock, int flags);

int (*getname)
(struct socket *sock, struct sockaddr *addr, int *sockaddr_len, int peer);

unsigned int (*poll)
(struct file *file, struct socket *sock, struct poll_table_struct *wait);

int (*ioctl)
(struct socket *sock, unsigned int cmd, unsigned long arg);

int (*listen)
(struct socket *sock, int len);

int (*shutdown)
(struct socket *sock, int flags);

int (*setsockopt)
(struct socket *sock, int level, int optname, char __user *optval, int optlen);

int (*getsockopt)
(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen);

int (*sendmsg)
(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len);

int (*recvmsg)
(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags);

int (*mmap)
(struct file *file,
struct socket *sock, struct vm_area_struct * vma);

ssize_t (*sendpage) (struct socket *sock, struct page *page, int offset, size_t size, int flags);

};

//BSD套接字状态

typedef enum {

SS_FREE = 0, /* not allocated */

SS_UNCONNECTED, /* unconnected to any socket */

SS_CONNECTING, /* in process of connecting */

SS_CONNECTED, /* connected to socket */

SS_DISCONNECTING /* in process of disconnecting */

} socket_state;
3.1.2、BSD套接字初始化

//net/socket.c
//BSD套接字的初始化

void __init sock_init(void)

{

int i;

/*

* Initialize all address (protocol) families.

*/

for (i = 0; i < NPROTO; i++)

net_families[i] = NULL; //协议簇数组初始化

/*

* Initialize sock SLAB cache.

*/

sk_init();//分配sock缓存

#ifdef SLAB_SKB

/*

* Initialize skbuff SLAB cache

*/

skb_init();

#endif

/*

* Initialize the protocols module.

*/

init_inodecache();

//注册sockfs文件系统

register_filesystem(&sock_fs_type);

//安装sockfs

sock_mnt = kern_mount(&sock_fs_type);

/* The real protocol initialization is performed when

* do_initcalls is run.

*/

#ifdef CONFIG_NETFILTER

netfilter_init();

#endif

}

//net/socket.c

//sockfs文件系统的安装点

static struct vfsmount *sock_mnt;

//sockfs文件系统类型

static struct file_system_type sock_fs_type = {

.name = "sockfs",

.get_sb = sockfs_get_sb,

.kill_sb = kill_anon_super,

};

//地址簇及协议信息

static struct net_proto_family *net_families[NPROTO];
sock_init在系统初始化的被调用:



3.1.3、BSD套接字的系统调用

实际上,Linux内核只提供了一个与套接字相关的系统调用,即sys_socketcall,应用程序的所有套接字调用都会映射到这个系统调用上。



//BSD套接字调用入口(net/socket.c)

asmlinkage long sys_socketcall(int call, unsigned long __user *args)

{

unsigned long a[6];

unsigned long a0,a1;

int err;

if(call<1||call>SYS_RECVMSG)

return -EINVAL;

/* copy_from_user should be SMP safe. */

if (copy_from_user(a, args, nargs[call]))//从用户区拷贝参数

return -EFAULT;

a0=a[0];

a1=a[1];

switch(call) //调用相应的函数

{

case SYS_SOCKET:

err = sys_socket(a0,a1,a[2]);

break;

case SYS_BIND:

err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);

break;

case SYS_CONNECT:

err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);

break;

case SYS_LISTEN:

err = sys_listen(a0,a1);

break;

case SYS_ACCEPT:

err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);

break;

case SYS_GETSOCKNAME:

err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);

break;

case SYS_GETPEERNAME:

err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);

break;

case SYS_SOCKETPAIR:

err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);

break;

case SYS_SEND:

err = sys_send(a0, (void __user *)a1, a[2], a[3]);

break;

case SYS_SENDTO:

err = sys_sendto(a0,(void __user *)a1, a[2], a[3],

(struct sockaddr __user *)a[4], a[5]);

break;

case SYS_RECV:

err = sys_recv(a0, (void __user *)a1, a[2], a[3]);

break;

case SYS_RECVFROM:

err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],

(struct sockaddr __user *)a[4], (int __user *)a[5]);

break;

case SYS_SHUTDOWN:

err = sys_shutdown(a0,a1);

break;

case SYS_SETSOCKOPT:

err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);

break;

case SYS_GETSOCKOPT:

err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);

break;

case SYS_SENDMSG:

err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);

break;

case SYS_RECVMSG:

err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);

break;

default:

err = -EINVAL;

break;

}

return err;

}

//include/asm/unistd.h

#define __NR_socketcall 102 //系统调用号

下面来看一下sys_socket的实现:

//net/socket.c

/*创建socket

**首先建立一个socket数据结构,然后将其“映射”到一个已打开的文件.

*/

asmlinkage long sys_socket(int family, int type, int protocol)

{

int retval;

struct socket *sock;

//创建socket

retval = sock_create(family, type, protocol, &sock);

if (retval < 0)

goto out;

//将socket映射到文件描述符

retval = sock_map_fd(sock);

if (retval < 0)

goto out_release;

out:

/* It may be already another descriptor 8) Not kernel problem. */

return retval;

out_release:

sock_release(sock);

return retval;

}

int sock_create(int family, int type, int protocol, struct socket **res)

{

return __sock_create(family, type, protocol, res, 0);

}

static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)

{

int i;

int err;

struct socket *sock;

/*

* Check protocol is in range

*/

//检查协议是否可用

if (family < 0 || family >= NPROTO)

return -EAFNOSUPPORT;

if (type < 0 || type >= SOCK_MAX)

return -EINVAL;

/* Compatibility.

This uglymoron is moved from INET layer to here to avoid

deadlock in module load.

*/

if (family == PF_INET && type == SOCK_PACKET) {

static int warned;

if (!warned) {

warned = 1;

printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);

}

family = PF_PACKET;

}

err = security_socket_create(family, type, protocol, kern);

if (err)

return err;

#if defined(CONFIG_KMOD)

/* Attempt to load a protocol module if the find failed.

*

* 12/09/1996 Marcin: But! this makes REALLY only sense, if the user

* requested real, full-featured networking support upon configuration.

* Otherwise module support will break!

*/

if (net_families[family]==NULL)

{

request_module("net-pf-%d",family);

}

#endif

net_family_read_lock();

if (net_families[family] == NULL) {

i = -EAFNOSUPPORT;

goto out;

}

/*

* Allocate the socket and allow the family to set things up. if

* the protocol is 0, the family is instructed to select an appropriate

* default.

*/

//从sockfs分配一个inode,并为之分配一个套接字结构

if (!(sock = sock_alloc()))

{

printk(KERN_WARNING "socket: no more sockets\n");

i = -ENFILE; /* Not exactly a match, but its the

closest posix thing */

goto out;

}

//设置类型

sock->type = type;

/*

* We will call the ->create function, that possibly is in a loadable

* module, so we have to bump that loadable module refcnt first.

*/

i = -EAFNOSUPPORT;

if (!try_module_get(net_families[family]->owner))

goto out_release;

//调用具体协议的create函数

if ((i = net_families[family]->create(sock, protocol)) < 0)

goto out_module_put;

/*

* Now to bump the refcnt of the [loadable] module that owns this

* socket at sock_release time we decrement its refcnt.

*/

if (!try_module_get(sock->ops->owner)) {

sock->ops = NULL;

goto out_module_put;

}

/*

* Now that we're done with the ->create function, the [loadable]

* module can have its refcnt decremented

*/

module_put(net_families[family]->owner);

*res = sock;

security_socket_post_create(sock, family, type, protocol, kern);

out:

net_family_read_unlock();

return i;

out_module_put:

module_put(net_families[family]->owner);

out_release:

sock_release(sock);

goto out;

}

///////////////////////////////////////////////////////////

int sock_map_fd(struct socket *sock)

{

int fd;

struct qstr this;

char name[32];

/*

* Find a file descriptor suitable for return to the user.

*/

//分配一个没有使用的描述符

fd = get_unused_fd();

if (fd >= 0) {

struct file *file = get_empty_filp();

if (!file) {

put_unused_fd(fd);

fd = -ENFILE;

goto out;

}

sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);

this.name = name;

this.len = strlen(name);

this.hash = SOCK_INODE(sock)->i_ino;

//从sockfs文件系统中分配一个目录项对象

file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);

if (!file->f_dentry) {

put_filp(file);

put_unused_fd(fd);

fd = -ENOMEM;

goto out;

}

file->f_dentry->d_op = &sockfs_dentry_operations;

//将目录项对象与sock的索引节点关联起来

d_add(file->f_dentry, SOCK_INODE(sock));

file->f_vfsmnt = mntget(sock_mnt);

file->f_mapping = file->f_dentry->d_inode->i_mapping;

//设置sock对应的文件对象

sock->file = file;

//设置文件对象的操作函数

file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;

file->f_mode = FMODE_READ | FMODE_WRITE;

file->f_flags = O_RDWR;

file->f_pos = 0;

fd_install(fd, file);

}

out:

return fd;

}

3.2、INET套接字

INET套接字就是支持 Internet 地址族的套接字,它位于TCP协议之上, BSD套接字之下,如下:



3.2.1、数据结构

//include/net/sock.h

//与特定协议相关的socket

struct sock {

/*

* Now struct tcp_tw_bucket also uses sock_common, so please just

* don't add nothing before this first member (__sk_common) --acme

*/

struct sock_common __sk_common;

#define sk_family __sk_common.skc_family

#define sk_state __sk_common.skc_state

#define sk_reuse __sk_common.skc_reuse

#define sk_bound_dev_if __sk_common.skc_bound_dev_if

#define sk_node __sk_common.skc_node

#define sk_bind_node __sk_common.skc_bind_node

#define sk_refcnt __sk_common.skc_refcnt

volatile unsigned char sk_zapped;

unsigned char sk_shutdown;

unsigned char sk_use_write_queue;

unsigned char sk_userlocks;

socket_lock_t sk_lock;

int sk_rcvbuf;

wait_queue_head_t *sk_sleep;

struct dst_entry *sk_dst_cache;

rwlock_t sk_dst_lock;

struct xfrm_policy *sk_policy[2];

atomic_t sk_rmem_alloc;

struct sk_buff_head sk_receive_queue;

atomic_t sk_wmem_alloc;

struct sk_buff_head sk_write_queue;

atomic_t sk_omem_alloc;

int sk_wmem_queued;

int sk_forward_alloc;

unsigned int sk_allocation;

int sk_sndbuf;

unsigned long sk_flags;

char sk_no_check;

unsigned char sk_debug;

unsigned char sk_rcvtstamp;

unsigned char sk_no_largesend;

int sk_route_caps;

unsigned long sk_lingertime;

int sk_hashent;

/*

* The backlog queue is special, it is always used with

* the per-socket spinlock held and requires low latency

* access. Therefore we special case it's implementation.

*/

struct {

struct sk_buff *head;

struct sk_buff *tail;

} sk_backlog;

rwlock_t sk_callback_lock;

struct sk_buff_head sk_error_queue;

struct proto *sk_prot;

int sk_err,

sk_err_soft;

unsigned short sk_ack_backlog;

unsigned short sk_max_ack_backlog;

__u32 sk_priority;

unsigned short sk_type;

unsigned char sk_localroute;

unsigned char sk_protocol;

struct ucred sk_peercred;

int sk_rcvlowat;

long sk_rcvtimeo;

long sk_sndtimeo;

struct sk_filter *sk_filter;

void *sk_protinfo;

kmem_cache_t *sk_slab;

struct timer_list sk_timer;

struct timeval sk_stamp;

struct socket *sk_socket;

void *sk_user_data;

struct module *sk_owner;

struct page *sk_sndmsg_page;

__u32 sk_sndmsg_off;

struct sk_buff *sk_send_head;

int sk_write_pending;

void *sk_security;

__u8 sk_queue_shrunk;

/* three bytes hole, try to pack */

void (*sk_state_change)(struct sock *sk);

void (*sk_data_ready)(struct sock *sk, int bytes);

void (*sk_write_space)(struct sock *sk);

void (*sk_error_report)(struct sock *sk);

int (*sk_backlog_rcv)(struct sock *sk,

struct sk_buff *skb);

void (*sk_destruct)(struct sock *sk);

};

//底层协议的操作函数

struct proto {

void (*close)(struct sock *sk,

long timeout);

int (*connect)(struct sock *sk,

struct sockaddr *uaddr,

int addr_len);

int (*disconnect)(struct sock *sk, int flags);

struct sock * (*accept) (struct sock *sk, int flags, int *err);

int (*ioctl)(struct sock *sk, int cmd,

unsigned long arg);

int (*init)(struct sock *sk);

int (*destroy)(struct sock *sk);

void (*shutdown)(struct sock *sk, int how);

int (*setsockopt)(struct sock *sk, int level,

int optname, char __user *optval,

int optlen);

int (*getsockopt)(struct sock *sk, int level,

int optname, char __user *optval,

int __user *option);

int (*sendmsg)(struct kiocb *iocb, struct sock *sk,

struct msghdr *msg, size_t len);

int (*recvmsg)(struct kiocb *iocb, struct sock *sk,

struct msghdr *msg,

size_t len, int noblock, int flags,

int *addr_len);

int (*sendpage)(struct sock *sk, struct page *page,

int offset, size_t size, int flags);

int (*bind)(struct sock *sk,

struct sockaddr *uaddr, int addr_len);

int (*backlog_rcv) (struct sock *sk,

struct sk_buff *skb);

/* Keeping track of sk's, looking them up, and port selection methods. */

void (*hash)(struct sock *sk);

void (*unhash)(struct sock *sk);

int (*get_port)(struct sock *sk, unsigned short snum);

/* Memory pressure */

void (*enter_memory_pressure)(void);

atomic_t *memory_allocated; /* Current allocated memory. */

atomic_t *sockets_allocated; /* Current number of sockets. */

/*

* Pressure flag: try to collapse.

* Technical note: it is used by multiple contexts non atomically.

* All the sk_stream_mem_schedule() is of this nature: accounting

* is strict, actions are advisory and have some latency.

*/

int *memory_pressure;

int *sysctl_mem;

int *sysctl_wmem;

int *sysctl_rmem;

int max_header;

kmem_cache_t *slab;

int slab_obj_size;

struct module *owner;

char name[32];

struct {

int inuse;

u8 __pad[SMP_CACHE_BYTES - sizeof(int)];

} stats[NR_CPUS];

};

inet_init()函数:

//net/ipv4/af_inet.c

/*系统初始化时被调用

**调用路径:start_kernel() -->init() -->do_basic_setup() -->do_initcalls()-->inet_init()

*/

static int __init inet_init(void)

{

struct sk_buff *dummy_skb;

struct inet_protosw *q;

struct list_head *r;

int rc = -EINVAL;

if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {

printk(KERN_CRIT "%s: panic\n", __FUNCTION__);

goto out;

}

rc = sk_alloc_slab(&tcp_prot, "tcp_sock");

if (rc) {

sk_alloc_slab_error(&tcp_prot);

goto out;

}

rc = sk_alloc_slab(&udp_prot, "udp_sock");

if (rc) {

sk_alloc_slab_error(&udp_prot);

goto out_tcp_free_slab;

}

rc = sk_alloc_slab(&raw_prot, "raw_sock");

if (rc) {

sk_alloc_slab_error(&raw_prot);

goto out_udp_free_slab;

}

/*

* Tell SOCKET that we are alive


*/

//注册Internet协议簇的相关信息

(void)sock_register(&inet_family_ops);

/*

* Add all the base protocols.

*/

//添加基本的协议

if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)

printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");

if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)

printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");

if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)

printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");

#ifdef CONFIG_IP_MULTICAST

if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)

printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");

#endif

/* Register the socket-side information for inet_create. */

for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)

INIT_LIST_HEAD(r);

//将inetsw_array中元素加入到inetsw链表中

for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)

inet_register_protosw(q);

/*

* Set the ARP module up

*/

arp_init(); //ARP协议初始化

/*

* Set the IP module up

*/

ip_init(); //IP协议初始化

tcp_v4_init(&inet_family_ops);

/* Setup TCP slab cache for open requests. */

tcp_init();

/*

* Set the ICMP layer up

*/

icmp_init(&inet_family_ops);

/*

* Initialise the multicast router

*/

#if defined(CONFIG_IP_MROUTE)

ip_mr_init();

#endif

/*

* Initialise per-cpu ipv4 mibs

*/

if(init_ipv4_mibs())

printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;

ipv4_proc_init();

ipfrag_init();

rc = 0;

out:

return rc;

out_tcp_free_slab:

sk_free_slab(&tcp_prot);

out_udp_free_slab:

sk_free_slab(&udp_prot);

goto out;

}

//net/ipv4/af_inet.c

//INET协议簇信息

static struct net_proto_family inet_family_ops = {

.family = PF_INET,

.create = inet_create,

.owner = THIS_MODULE,

};

static struct list_head inetsw[SOCK_MAX];

//该数组中的所有元素都会插入到inetsw的链表中

static struct inet_protosw inetsw_array[] =

{

{

.type = SOCK_STREAM,

.protocol = IPPROTO_TCP,

.prot = &tcp_prot,

.ops = &inet_stream_ops,

.capability = -1,

.no_check = 0,

.flags = INET_PROTOSW_PERMANENT,

},

{

.type = SOCK_DGRAM,

.protocol = IPPROTO_UDP,

.prot = &udp_prot,

.ops = &inet_dgram_ops,

.capability = -1,

.no_check = UDP_CSUM_DEFAULT,

.flags = INET_PROTOSW_PERMANENT,

},

{

.type = SOCK_RAW,

.protocol = IPPROTO_IP, /* wild card */

.prot = &raw_prot,

.ops = &inet_sockraw_ops,

.capability = CAP_NET_RAW,

.no_check = UDP_CSUM_DEFAULT,

.flags = INET_PROTOSW_REUSE,

}

};

//流套接字操作函数

struct proto_ops inet_stream_ops = {

.family = PF_INET,

.owner = THIS_MODULE,

.release = inet_release,

.bind = inet_bind,

.connect = inet_stream_connect,

.socketpair = sock_no_socketpair,

.accept = inet_accept,

.getname = inet_getname,

.poll = tcp_poll,

.ioctl = inet_ioctl,

.listen = inet_listen,

.shutdown = inet_shutdown,

.setsockopt = sock_common_setsockopt,

.getsockopt = sock_common_getsockopt,

.sendmsg = inet_sendmsg,

.recvmsg = sock_common_recvmsg,

.mmap = sock_no_mmap,

.sendpage = tcp_sendpage

};

//tcp协议

static struct net_protocol tcp_protocol = {

.handler = tcp_v4_rcv,

.err_handler = tcp_v4_err,

.no_policy = 1,

};

static struct net_protocol udp_protocol = {

.handler = udp_rcv,

.err_handler = udp_err,

.no_policy = 1,

};

static struct net_protocol icmp_protocol = {

.handler = icmp_rcv,

};

//net/ipv4/tcp_ipv4.c

//tcp协议的操作函数

struct proto tcp_prot = {

.name = "TCP",

.owner = THIS_MODULE,

.close = tcp_close,

.connect = tcp_v4_connect,

.disconnect = tcp_disconnect,

.accept = tcp_accept,

.ioctl = tcp_ioctl,

.init = tcp_v4_init_sock,

.destroy = tcp_v4_destroy_sock,

.shutdown = tcp_shutdown,

.setsockopt = tcp_setsockopt,

.getsockopt = tcp_getsockopt,

.sendmsg = tcp_sendmsg,

.recvmsg = tcp_recvmsg,

.backlog_rcv = tcp_v4_do_rcv,

.hash = tcp_v4_hash,

.unhash = tcp_unhash,

.get_port = tcp_v4_get_port,

.enter_memory_pressure = tcp_enter_memory_pressure,

.sockets_allocated = &tcp_sockets_allocated,

.memory_allocated = &tcp_memory_allocated,

.memory_pressure = &tcp_memory_pressure,

.sysctl_mem = sysctl_tcp_mem,

.sysctl_wmem = sysctl_tcp_wmem,

.sysctl_rmem = sysctl_tcp_rmem,

.max_header = MAX_TCP_HEADER,

.slab_obj_size = sizeof(struct tcp_sock),

};

sock_register()函数:

//注册协议簇

int sock_register(struct net_proto_family *ops)

{

int err;

if (ops->family >= NPROTO) {

printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);

return -ENOBUFS;

}

net_family_write_lock();

err = -EEXIST;

if (net_families[ops->family] == NULL) {

net_families[ops->family]=ops;

err = 0;

}

net_family_write_unlock();

printk(KERN_INFO "NET: Registered protocol family %d\n",

ops->family);

return err;

}

inet_create()函数

//创建一个INET套接字

static int inet_create(struct socket *sock, int protocol)

{

struct sock *sk;

struct list_head *p;

struct inet_protosw *answer;

struct inet_opt *inet;

struct proto *answer_prot;

unsigned char answer_flags;

char answer_no_check;

int err;

sock->state = SS_UNCONNECTED;

/* Look for the requested type/protocol pair. */

answer = NULL;

rcu_read_lock();

list_for_each_rcu(p, &inetsw[sock->type]) {

answer = list_entry(p, struct inet_protosw, list);

/* Check the non-wild match. */

if (protocol == answer->protocol) {

if (protocol != IPPROTO_IP)

break;

} else {

/* Check for the two wild cases. */

if (IPPROTO_IP == protocol) {

protocol = answer->protocol;

break;

}

if (IPPROTO_IP == answer->protocol)

break;

}

answer = NULL;

}

err = -ESOCKTNOSUPPORT;

if (!answer)

goto out_rcu_unlock;

err = -EPERM;

if (answer->capability > 0 && !capable(answer->capability))

goto out_rcu_unlock;

err = -EPROTONOSUPPORT;

if (!protocol)

goto out_rcu_unlock;

//BSD socket的操作函数

sock->ops = answer->ops;

answer_prot = answer->prot;

answer_no_check = answer->no_check;

answer_flags = answer->flags;

rcu_read_unlock();

BUG_TRAP(answer_prot->slab != NULL);

err = -ENOBUFS;

sk = sk_alloc(PF_INET, GFP_KERNEL,

answer_prot->slab_obj_size,

answer_prot->slab);

if (sk == NULL)

goto out;

err = 0;

//特定协议套接字的操作函数

sk->sk_prot = answer_prot;

sk->sk_no_check = answer_no_check;

if (INET_PROTOSW_REUSE & answer_flags)

sk->sk_reuse = 1;

inet = inet_sk(sk);

if (SOCK_RAW == sock->type) {

inet->num = protocol;

if (IPPROTO_RAW == protocol)

inet->hdrincl = 1;

}

if (ipv4_config.no_pmtu_disc)

inet->pmtudisc = IP_PMTUDISC_DONT;

else

inet->pmtudisc = IP_PMTUDISC_WANT;

inet->id = 0;

//将sock与sk关联起来

sock_init_data(sock, sk);

sk_set_owner(sk, sk->sk_prot->owner);

sk->sk_destruct = inet_sock_destruct;

sk->sk_family = PF_INET;

sk->sk_protocol = protocol;

sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;

inet->uc_ttl = -1;

inet->mc_loop = 1;

inet->mc_ttl = 1;

inet->mc_index = 0;

inet->mc_list = NULL;

#ifdef INET_REFCNT_DEBUG

atomic_inc(&inet_sock_nr);

#endif

if (inet->num) {

/* It assumes that any protocol which allows

* the user to assign a number at socket

* creation time automatically

* shares.

*/

inet->sport = htons(inet->num);

/* Add to protocol hash chains. */

sk->sk_prot->hash(sk);

}

//调用init函数

if (sk->sk_prot->init) {

err = sk->sk_prot->init(sk);

if (err)

sk_common_release(sk);

}

out:

return err;

out_rcu_unlock:

rcu_read_unlock();

goto out;

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息