【Linux4.1.12源码分析】IP层报文发送之ip_output
2016-09-26 22:01
615 查看
上一篇提到ip_local_out函数最终会调用ip_output完成报文发送,本篇分析ip_output的处理过程。
1、ip_output函数
1、ip_output函数
int ip_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); //设置报文协议为IPV4 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, dev, ip_finish_output, //报文发送netfilter处理,如果允许则调用ip_finish_output !(IPCB(skb)->flags & IPSKB_REROUTED)); }2、ip_finish_output函数
static int ip_finish_output(struct sock *sk, struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { //仅经过ip_forward流程处理的报文携带该对象 IPCB(skb)->flags |= IPSKB_REROUTED; //该flag会影响后续报文的GSO处理 return dst_output_sk(sk, skb); //由于SNAT等策略处理,需要再次调用xfrm4_output函数来发包 } #endif if (skb_is_gso(skb)) return ip_finish_output_gso(sk, skb); //如果是gso报文 if (skb->len > ip_skb_dst_mtu(skb)) //非gso报文,报文大小超过设备MTU值,则需要进行IP分片 return ip_fragment(sk, skb, ip_finish_output2); return ip_finish_output2(sk, skb); //直接发送报文 }3、ip_finish_output_gso函数
static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb) { netdev_features_t features; struct sk_buff *segs; int ret = 0; /* common case: locally created skb or seglen is <= mtu */ if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || //只有ip forward流程该条件才会不成立,否则该条件成立 skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) return ip_finish_output2(sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. * * This can happen in two cases: * 1) TCP GRO packet, DF bit not set * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly * from host network stack. */ features = netif_skb_features(skb); //获取dev的offload feature segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); //skb gso报文分段 if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); return -ENOMEM; } consume_skb(skb); do { struct sk_buff *nskb = segs->next; int err; segs->next = NULL; err = ip_fragment(sk, segs, ip_finish_output2); //分段报文经过ip分片后通过ip_finish_output2发送 if (err && ret == 0) ret = err; segs = nskb; } while (segs); return ret; }4、ip_finish_output2函数
static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; u32 nexthop; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { struct sk_buff *skb2; skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); if (!skb2) { kfree_skb(skb); return -ENOMEM; } if (skb->sk) skb_set_owner_w(skb2, skb->sk); consume_skb(skb); skb = skb2; } rcu_read_lock_bh(); nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (!IS_ERR(neigh)) { int res = dst_neigh_output(dst, neigh, skb); //调用邻居子系统封装MAC头,并且调用二层发包函数完成报文发送 rcu_read_unlock_bh(); return res; } rcu_read_unlock_bh(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); kfree_skb(skb); return -EINVAL; }由于邻居子系统是比较庞大的一个系统,后续单独进行分析,另外ip分片和gso分段两个函数也比较复杂,后续将单独进行分析。
相关文章推荐
- 【Linux4.1.12源码分析】二层报文发送之报文GSO分段(IP层)
- 【Linux4.1.12源码分析】IP层报文发送之ip_local_out
- 【Linux4.1.12源码分析】IP层报文发送之ip_local_out
- 【Linux4.1.12源码分析】二层报文发送之报文GSO分段(skb_segment)
- 【Linux4.1.12源码分析】协议栈报文接收之IP层处理分析(ip_forward)
- 【Linux4.1.12源码分析】vxlan报文发送之udp_tunnel_xmit_skb
- 【Linux4.1.12源码分析】二层报文发送之报文GSO分段(MAC层)
- 【Linux4.1.12源码分析】二层报文发送之dev_queue_xmit
- 【Linux4.1.12源码分析】二层报文发送之报文GSO分段(TCP)
- 【Linux4.1.12源码分析】二层报文发送之报文GSO分段(UDP)
- 【Linux4.1.12源码分析】二层报文发送之GSO条件判断
- 【Linux4.1.12源码分析】协议栈报文接收之IP层处理分析(ip_local_deliver)
- 【Linux4.1.12源码分析】vxlan报文发送之iptunnel_xmit
- 【Linux4.1.12源码分析】二层报文发送之qdisc实现分析
- Linux TCP/IP 协议栈源码分析 - 数据 发送/接收 流程图
- 【Linux4.1.12源码分析】邻居子系统实现分析 - neigh_resolve_output() - neigh_probe() - neigh_update()
- 【Linux4.1.12源码分析】协议栈gro收包之IP层处理
- 【Linux4.1.12源码分析】邻居子系统实现分析 - ARP - ip_finish_output2()
- 【Linux4.1.12源码分析】邻居子系统实现分析
- [转载]Linux TCP/IP 协议栈源码分析