您的位置:首页 > 运维架构 > Linux

Linux内核收包精髓

2015-11-08 01:50 831 查看
网卡和磁盘是现代服务器里面性能要求最为苛刻的2个外设,我们来看一下Linux内核是如何处理高性能的网卡的。
int netif_rx(struct sk_buff *skb)
{
int this_cpu = smp_processor_id();
struct softnet_data *queue;
unsigned long flags;
if (skb->stamp.tv_sec == 0)
do_gettimeofday(&skb->stamp);
/* The code is rearranged so that the path is the most
short when CPU is congested, but is still operating.
*/
queue = &softnet_data[this_cpu];
local_irq_save(flags);
netdev_rx_stat[this_cpu].total++;
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
if (queue->throttle)
goto drop;
enqueue:
dev_hold(skb->dev);
__skb_queue_tail(&queue->input_pkt_queue,skb);
/* Runs from irqs or BH's, no need to wake BH */
cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
local_irq_restore(flags);
#ifndef OFFLINE_SAMPLE
get_sample_stats(this_cpu);
#endif
return softnet_data[this_cpu].cng_level;
}
if (queue->throttle) {
queue->throttle = 0;
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (atomic_dec_and_test(&netdev_dropping))
netdev_wakeup();
#endif
}
goto enqueue;
}
if (queue->throttle == 0) {
queue->throttle = 1;
netdev_rx_stat[this_cpu].throttled++;
#ifdef CONFIG_NET_HW_FLOWCONTROL
atomic_inc(&netdev_dropping);
#endif
}
drop:
netdev_rx_stat[this_cpu].dropped++;
local_irq_restore(flags);
kfree_skb(skb);
return NET_RX_DROP;
}
/* Deliver skb to an old protocol, which is not threaded well
or which do not understand shared skbs.
*/
static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
{
static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
int ret = NET_RX_DROP;
if (!last) {
skb = skb_clone(skb, GFP_ATOMIC);
if (skb == NULL)
return ret;
}
if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
kfree_skb(skb);
return ret;
}
/* The assumption (correct one) is that old protocols
did not depened on BHs different of NET_BH and TIMER_BH.
*/
/* Emulate NET_BH with special spinlock */
spin_lock(&net_bh_lock);
/* Disable timers and wait for all timers completion */
tasklet_disable(bh_task_vec+TIMER_BH);
ret = pt->func(skb, skb->dev, pt);
tasklet_hi_enable(bh_task_vec+TIMER_BH);
spin_unlock(&net_bh_lock);
return ret;
}

static void net_rx_action(struct softirq_action *h)
{
int this_cpu = smp_processor_id();
struct softnet_data *queue = &softnet_data[this_cpu];
unsigned long start_time = jiffies;
int bugdet = netdev_max_backlog;

br_read_lock(BR_NETPROTO_LOCK);

for (;;) {
struct sk_buff *skb;
struct net_device *rx_dev;

local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
local_irq_enable();

if (skb == NULL)
break;

skb_bond(skb);

rx_dev = skb->dev;

#ifdef CONFIG_NET_FASTROUTE
if (skb->pkt_type == PACKET_FASTROUTE) {
netdev_rx_stat[this_cpu].fastroute_deferred_out++;
dev_queue_xmit(skb);
dev_put(rx_dev);
continue;
}
#endif
skb->h.raw = skb->nh.raw = skb->data;
{
struct packet_type *ptype, *pt_prev;
unsigned short type = skb->protocol;

pt_prev = NULL;
for (ptype = ptype_all; ptype; ptype = ptype->next) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev) {
if (!pt_prev->data) {
deliver_to_old_ones(pt_prev, skb, 0);
} else {
atomic_inc(&skb->users);
pt_prev->func(skb,
skb->dev,
pt_prev);
}
}
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_DIVERT
if (skb->dev->divert && skb->dev->divert->divert)
handle_diverter(skb);
#endif /* CONFIG_NET_DIVERT */

#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (skb->dev->br_port != NULL &&
br_handle_frame_hook != NULL) {
handle_bridge(skb, pt_prev);
dev_put(rx_dev);
continue;
}
#endif

for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev) {
if (!pt_prev->data)
deliver_to_old_ones(pt_prev, skb, 0);
else {
atomic_inc(&skb->users);
pt_prev->func(skb,
skb->dev,
pt_prev);
}
}
pt_prev = ptype;
}
}

if (pt_prev) {
if (!pt_prev->data)
deliver_to_old_ones(pt_prev, skb, 1);
else
pt_prev->func(skb, skb->dev, pt_prev);
} else
kfree_skb(skb);
}

dev_put(rx_dev);

if (bugdet-- < 0 || jiffies - start_time > 1)
goto softnet_break;

#ifdef CONFIG_NET_HW_FLOWCONTROL
if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
if (atomic_dec_and_test(&netdev_dropping)) {
queue->throttle = 0;
netdev_wakeup();
goto softnet_break;
}
}
#endif

}
br_read_unlock(BR_NETPROTO_LOCK);

local_irq_disable();
if (queue->throttle) {
queue->throttle = 0;
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (atomic_dec_and_test(&netdev_dropping))
netdev_wakeup();
#endif
}
local_irq_enable();

NET_PROFILE_LEAVE(softnet_process);
return;

softnet_break:
br_read_unlock(BR_NETPROTO_LOCK);

local_irq_disable();
netdev_rx_stat[this_cpu].time_squeeze++;
/* This already runs in BH context, no need to wake up BH's */
cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
local_irq_enable();

NET_PROFILE_LEAVE(softnet_process);
return;
}


看懂了吧?

麻痹的,Linux内核在网卡异步处理机制上比传统的BSD激进多了,性能优越得多,不过稳定性也随之下降!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: