您的位置:首页 > 运维架构 > Linux

linux2.6.30 内核netfilter部分IPV4发包流程分析

2009-09-10 22:20 549 查看
转载请注明 作者: Alanx Email:zhangsuozhu@tom.com QQ:8540426

http://hi.baidu.com/alanx/
当内核准备好要发包时,将调用以下函数,其中最重要的结构是struct sk_buff请查相关资料

int ip_output(struct sk_buff *skb)

{

struct net_device *dev = skb_dst(skb)->dev; 取发包设备

IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); 此处应为snmp网网协议相关的操作

skb->dev = dev; 设发发包的设备

skb->protocol = htons(ETH_P_IP); 指定IP协议

return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,

ip_finish_output,

!(IPCB(skb)->flags & IPSKB_REROUTED));

}

最后一句是调用勾子,就是netfilter机制最著名的勾子函数。别急,我们像下看。

NF_HOOK_COND()是一个宏,定义如下:

#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) /

({int __ret; /

if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)/

__ret = (okfn)(skb); /

__ret;})

再像下看:

static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,

struct sk_buff *skb,

struct net_device *indev,

struct net_device *outdev,

int (*okfn)(struct sk_buff *), int thresh,

int cond)

{

if (!cond)

return 1;

#ifndef CONFIG_NETFILTER_DEBUG

if (list_empty(&nf_hooks[pf][hook]))

return 1;

#endif

return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);

}

好像明白点什么了,嗯。cond非0时或nf_hooks[pf][hook]链表为空时,int nf_hook_thresh()返回1。接着NF_HOOK_COND()会调用(okfn)(skb)的指针函数。

根据传过来的参数,我们得知,实际上就是调用:ip_finish_output(sk).最后我们再分析ip_finish_output(sk),先让我们看看cond和nf_hooks[pf][hook]链表吧。

cond:

根据我们传来的参数得知就是!(IPCB(skb)->flags & IPSKB_REROUTED),具体没细看,猜想可能是和路由重发有关吧。感兴趣的朋友可以了解一下。

nf_hooks[pf][hook]:

根据上层传过来的参数据应为:nf_hooks[PF_INET][NF_INET_POST_ROUTING] 。
让我们看看它的定义struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]
__read_mostly;

明白了吧。就是的一链表数组。我们这里用的是其中的nf_hooks[PF_INET][NF_INET_POST_ROUTING]。解释一下的话,应
为PF_INET协议时,路由发包的勾子函数链表。当我们用iptables或tc时。用netlink和内核通信,在这个链表上挂上很多结点。

有什么用呢?我们接着往下看:nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh)

我们看一下它的构成:

nt nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,

struct net_device *indev,

struct net_device *outdev,

int (*okfn)(struct sk_buff *),

int hook_thresh)

{

struct list_head *elem;

unsigned int verdict;

int ret = 0;

/* We may already have this, but read-locks nest anyway */

rcu_read_lock();

elem = &nf_hooks[pf][hook];

next_hook:

verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,

outdev, &elem, okfn, hook_thresh);

if (verdict == NF_ACCEPT || verdict == NF_STOP) {

ret = 1;

} else if (verdict == NF_DROP) {

kfree_skb(skb);

ret = -EPERM;

} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {

if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,

verdict >> NF_VERDICT_BITS))

goto next_hook;

}

rcu_read_unlock();

return ret;

}

关键是这句 verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,

outdev, &elem, okfn, hook_thresh);

根据上面返回的值,决定这个包是否通过NF_ACCEPT 或 丢弃 NF_DROP 还是进行流控入排队 NF_QUEUE.注意。这是个循环goto next_hook

接下来,我们看看关键的verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh);

unsigned int nf_iterate(struct list_head *head,

struct sk_buff *skb,

unsigned int hook,

const struct net_device *indev,

const struct net_device *outdev,

struct list_head **i,

int (*okfn)(struct sk_buff *),

int hook_thresh)

{

unsigned int verdict;

/*

* The caller must not block between calls to this

* function because of risk of continuing from deleted element.

*/

list_for_each_continue_rcu(*i, head) {

struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;

if (hook_thresh > elem->priority)

continue;

/* Optimization: we don't need to hold module

reference here, since function can't sleep. --RR */

verdict = elem->hook(hook, skb, indev, outdev, okfn);

if (verdict != NF_ACCEPT) {

#ifdef CONFIG_NETFILTER_DEBUG

if (unlikely((verdict & NF_VERDICT_MASK)

> NF_MAX_VERDICT)) {

NFDEBUG("Evil return from %p(%u)./n",

elem->hook, hook);

continue;

}

#endif

if (verdict != NF_REPEAT)

return verdict;

*i = (*i)->prev;

}

}

return NF_ACCEPT;

}

其中

1、ist_for_each_continue_rcu(*i, head) 是循环遍历head 既刚才所说的nf_hooks[PF_INET][NF_INET_POST_ROUTING]。

2、struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; 取出上面链表的结点。

3、if (hook_thresh > elem->priority) 如果优先级小。则下一个结点这里 hook_thresh为INT_MIN

continue;



4、verdict = elem->hook(hook, skb, indev, outdev, okfn);结于调到勾子了。万岁。最激动人心的事。上面说了勾子是我们用iptables或tc通过与内核通讯的netlink,挂到nf_hooks上的。

让我们看看。 nf_hooks上的结点的结构吧。

typedef unsigned int nf_hookfn(unsigned int hooknum,

struct sk_buff *skb,

const struct net_device *in,

const struct net_device *out,

int (*okfn)(struct sk_buff *));

struct nf_hook_ops

{

struct list_head list; 链表



/* User fills in from here down. */

nf_hookfn *hook; 勾子函数,就是上面调用的

struct module *owner; 模块

u_int8_t pf; 如:PF_INT

unsigned int hooknum; 如:NF_INET_POST_ROUTING

/* Hooks are ordered in ascending priority. */

int priority; 优先级:如INT_MIN等

};



等以上的都处理完,最后,要么把包丢弃了。要最一开始的ip_finish_output处理发包啦!先写到此了!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: