您的位置:首页 > 运维架构 > Linux

【Linux4.1.12源码分析】AF_INET raw socket实现原理分析

2016-10-26 22:19 706 查看
在分析AF_PACKET raw socket实现时,我们从创建socket入手来分析, 本篇我们从收包流程入手来分析。在分析协议栈报文接收IP层分析时,我们知道IP层把报文交给raw sock的入口函数是raw_local_deliver。我们从这个函数来看看是如何把一个报文提交给raw
socket的。

1、raw_local_deliver函数

int raw_local_deliver(struct sk_buff *skb, int protocol)  //该protocol为ip头中的协议
{
int hash;
struct sock *raw_sk;

hash = protocol & (RAW_HTABLE_SIZE - 1);	//根据协议类型计算出hash值,hash值共256个,所以不同的ip协议不会重叠
raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);	//得到sock,这个sock一定是在创建的时候放到raw_v4_hashinfo中的

/* If there maybe a raw socket we must check - if not we
* don't care less
*/
if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))	//sock不为空,则把报文提交给sock
raw_sk = NULL;

return raw_sk != NULL;

}
2、raw_v4_input函数

static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
{
struct sock *sk;
struct hlist_head *head;
int delivered = 0;
struct net *net;

read_lock(&raw_v4_hashinfo.lock);
head = &raw_v4_hashinfo.ht[hash];	//得到相同hash的sock链表
if (hlist_empty(head))
goto out;

net = dev_net(skb->dev);
sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,	//sock是否能够接收报文,匹配ip源地址、ip目的地址等
iph->saddr, iph->daddr,
skb->dev->ifindex);

while (sk) {		//第一个如果不匹配,后续则不再处理,所以次序很重要,对raw socket肯定是能够匹配的报文的
delivered = 1;
if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&	//报文不是ICMP报文或者sock未设置icmp filter
ip_mc_sf_allow(sk, iph->daddr, iph->saddr,		//非组播报文,或者组播报文允许通过
skb->dev->ifindex)) {
struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);	//克隆skb

/* Not releasing hash table! */
if (clone)
raw_rcv(sk, clone);	//提交报文到sock
}
sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,		//取下一个sock
iph->saddr, iph->daddr,
skb->dev->ifindex);
}
out:
read_unlock(&raw_v4_hashinfo.lock);
return delivered;
}
3、raw_rcv函数

int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {	//ipset策略检测
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
return NET_RX_DROP;
}
nf_reset(skb);

skb_push(skb, skb->data - skb_network_header(skb));	//报文移动到ip头,用户看到报文时候包含了IP头

raw_rcv_skb(sk, skb);	//sock接收报文
return 0;
}
4、raw_rcv_skb函数

static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
/* Charge it to the socket. */

ipv4_pktinfo_prepare(sk, skb);
if (sock_queue_rcv_skb(sk, skb) < 0) {	//放入sock的收包队列,并唤醒等待进程
kfree_skb(skb);
return NET_RX_DROP;
}

return NET_RX_SUCCESS;
}
到这里我们知道,把报文提交给raw socket的关键是raw_v4_hashinfo全局变量,那么要看看是否在创建AF_INET raw socket时把sock对象保存到该全局变量中。socket创建流程请参考AF_PACKET raw socket分析。AF_INET
raw socket的create函数是inet_create,我们来看一看该函数的实现。

inet_create函数

static int inet_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct sock *sk;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
int try_loading_module = 0;
int err;

sock->state = SS_UNCONNECTED;

/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {	//从inetsw找到匹配的inet_protosw对象

err = 0;
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
break;
} else {
/* Check for the two wild cases. */
if (IPPROTO_IP == protocol) {
protocol = answer->protocol;
break;
}
if (IPPROTO_IP == answer->protocol)
break;
}
err = -EPROTONOSUPPORT;
}

if (unlikely(err)) {
if (try_loading_module < 2) {
rcu_read_unlock();
/*
* Be more specific, e.g. net-pf-2-proto-132-type-1
* (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
*/
if (++try_loading_module == 1)
request_module("net-pf-%d-proto-%d-type-%d",
PF_INET, protocol, sock->type);
/*
* Fall back to generic, e.g. net-pf-2-proto-132
* (net-pf-PF_INET-proto-IPPROTO_SCTP)
*/
else
request_module("net-pf-%d-proto-%d",
PF_INET, protocol);
goto lookup_protocol;
} else
goto out_rcu_unlock;
}

err = -EPERM;
if (sock->type == SOCK_RAW && !kern &&
!ns_capable(net->user_ns, CAP_NET_RAW))
goto out_rcu_unlock;

sock->ops = answer->ops;
answer_prot = answer->prot;		//raw socket,对象为raw_prot
answer_flags = answer->flags;
rcu_read_unlock();

WARN_ON(!answer_prot->slab);

err = -ENOBUFS;
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);	//创建sock对象
if (!sk)
goto out;

err = 0;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;

inet = inet_sk(sk);
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;

inet->nodefrag = 0;

if (SOCK_RAW == sock->type) {		//raw socket
inet->inet_num = protocol;	//如果是raw socket,则inet_num设置为protocol值
if (IPPROTO_RAW == protocol)
inet->hdrincl = 1;
}

if (net->ipv4.sysctl_ip_no_pmtu_disc)
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;

inet->inet_id = 0;

sock_init_data(sock, sk);	//sock对象初始化

sk->sk_destruct	   = inet_sock_destruct;
sk->sk_protocol	   = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;

inet->uc_ttl	= -1;
inet->mc_loop	= 1;
inet->mc_ttl	= 1;
inet->mc_all	= 1;
inet->mc_index	= 0;
inet->mc_list	= NULL;
inet->rcv_tos	= 0;

sk_refcnt_debug_inc(sk);

if (inet->inet_num) {		//raw socket该值等于protocol,条件成立
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
inet->inet_sport = htons(inet->inet_num);
/* Add to protocol hash chains. */
sk->sk_prot->hash(sk);	//socket放到hash表中,raw socket对应raw_hash_sk函数
}

if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);	//raw socket对应raw_init函数
if (err)
sk_common_release(sk);
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
}
raw_hash_sk函数

void raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;	//h.raw_hash即为raw_v4_hashinfo,和收包中的全局对象对上了
struct hlist_head *head;

head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
4000
//通过inet_num(即protocol)计算出链表的header

write_lock_bh(&h->lock);
sk_add_node(sk, head);		//sock添加到head中
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
}
raw_init函数

static int raw_init(struct sock *sk)
{
struct raw_sock *rp = raw_sk(sk);

if (inet_sk(sk)->inet_num == IPPROTO_ICMP)
memset(&rp->filter, 0, sizeof(rp->filter));	//如果是icmp协议,那么初始化filter为0
return 0;
}


从上述分析,总结下就是在AF_INET raw socket创建过程中,会在raw_v4_hashinfo注册sock对象(注册到protocol对象的链表中),在IP层的收包阶段,会根据报文ip头中的协议,去raw_v4_hashinfo查找是否有对应的socket,如果有则提交给该socket处理。 还有一个疑问没搞明白,AF_INET raw socket不能接收所有的IP类型报文,只能选择其中一种protocol,这个应该是有原因,暂时还回答不了,先留着。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息