您的位置:首页 > 理论基础 > 计算机网络

Linux网络协议栈之网络设备管理

2010-07-09 16:22 302 查看
Linux为何要对网络设备单独管理呢?这是因为。协议栈很多地方都会涉及到网络设备。小至IP地址的设置。大至IP路由的更新。都离不开高效的网络设备管理。将网络设备单独管理可以提高效率!    每个网络设备,在linux中都会对应一个数据结构,net_device。 就从这个结构说起Linux 2。6。21中,对net_device定义如下:
struct net_device
{
  //设备的名称,例如常见的“eth0”等
   char     name[IFNAMSIZ];
   //共享内存的起始,结束地址
   unsigned long   mem_end; /* shared mem end */
   unsigned long   mem_start;  /* shared mem start  */
   //网络设备的I/O基地址
   unsigned long   base_addr;  /* device I/O address */
   //被赋予的中断号
   unsigned int    irq;   /* device IRQ number  */
   //在多端口设备上使用哪一个端口
   unsigned char   if_port; /* Selectable AUI, TP,..*/
   //为设备分配的DMA通道
   unsigned char   dma;   /* DMA channel     */
   //设备的状态
   unsigned long   state;
   // 下一个net_device
   struct net_device *next;
   //初始化函数。
   int      (*init)(struct net_device *dev);
   struct net_device *next_sched;
   /* Interface index. Unique device identifier  */
   //设备在内核中对应的序号
   int      ifindex;
   int      iflink;
   //获得接口状态的函数指针
   struct net_device_stats* (*get_stats)(struct net_device *dev);
   struct iw_statistics* (*get_wireless_stats)(struct net_device *dev);
   struct iw_handler_def * wireless_handlers;
   struct ethtool_ops *ethtool_ops;
   //传输状态。检查传输是否被锁住
   unsigned long   trans_start; /* Time (in jiffies) of last Tx */
   //最使使用的时间
   unsigned long   last_rx; /* Time of last Rx */
   //接口标志
   unsigned short     flags;  /* interface flags (ala BSD)  */
   unsigned short     gflags;
    unsigned short     priv_flags; /* Like 'flags' but invisible to userspace. */
    unsigned short     unused_alignment_fixer; /* Because we need priv_flags,
                             * and we want to be 32-bit aligned.
                             */
   unsigned   mtu; /* interface MTU value   */
   unsigned short     type;  /* interface hardware type */
   unsigned short     hard_header_len;  /* hardware hdr length */
   void     *priv;  /* pointer to private data */
   struct net_device *master; /* Pointer to master device of a group,
             * which this device is member of.
             */
   /* Interface address info. */
   unsigned char   broadcast[MAX_ADDR_LEN];  /* hw bcast add  */
   unsigned char   dev_addr[MAX_ADDR_LEN]; /* hw address */
   unsigned char   addr_len; /* hardware address length */
   struct dev_mc_list *mc_list; /* Multicast mac addresses */
   int      mc_count; /* Number of installed mcasts  */
   int      promiscuity;
   int      allmulti;
   int      watchdog_timeo;
   struct timer_list watchdog_timer;
   /* Protocol specific pointers */
   void        *atalk_ptr;  /* AppleTalk link */
   void     *ip_ptr; /* IPv4 specific data */ 
   void          *dn_ptr;    /* DECnet specific data */
   void          *ip6_ptr;    /* IPv6 specific data */
   void     *ec_ptr; /* Econet specific data */
   void     *ax25_ptr;  /* AX.25 specific data */
   struct list_head  poll_list;  /* Link to poll list  */
   int      quota;
   int      weight;
   struct Qdisc    *qdisc;
   struct Qdisc    *qdisc_sleeping;
   struct Qdisc    *qdisc_ingress;
   struct list_head  qdisc_list;
   unsigned long   tx_queue_len; /* Max frames per queue allowed */
   /* ingress path synchronizer */
   spinlock_t     ingress_lock;
   /* hard_start_xmit synchronizer */
   spinlock_t     xmit_lock;
   /* cpu id of processor entered to hard_start_xmit or -1,
     if nobody entered there.
   */
   int      xmit_lock_owner;
   /* device queue lock */
   spinlock_t     queue_lock;
   /* Number of references to this device */
   atomic_t   refcnt;
   /* delayed register/unregister */
   struct list_head  todo_list;
   /* device name hash chain */
   struct hlist_node name_hlist;
   /* device index hash chain */
   struct hlist_node index_hlist;
   /* register/unregister state machine */
   enum { NETREG_UNINITIALIZED=0,
       NETREG_REGISTERING, /* called register_netdevice */
       NETREG_REGISTERED,  /* completed register todo */
       NETREG_UNREGISTERING,   /* called unregister_netdevice */
       NETREG_UNREGISTERED, /* completed unregister todo */
       NETREG_RELEASED,   /* called free_netdev */
   } reg_state;
   /* Net device features */
   int      features;
#define NETIF_F_SG   1  /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM   2  /* Can checksum only TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM   4  /* Does not require checksum. F.e. loopack. */
#define NETIF_F_HW_CSUM   8  /* Can checksum all the packets. */
#define NETIF_F_HIGHDMA   32  /* Can DMA to high memory. */
#define NETIF_F_FRAGLIST  64  /* Scatter/gather IO. */
#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER  512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_TSO     2048 /* Can offload TCP/IP segmentation */
#define NETIF_F_LLTX    4096 /*
LockLess
TX
*/
   /* Called after device is detached from network. */
   void     (*uninit)(struct net_device *dev);
   /* Called after last user reference disappears. */
   void     (*destructor)(struct net_device *dev);
   /* Pointers to interface service routines.   */
   //打开函数指针
   int      (*open)(struct net_device *dev);
   //设备停用时调用此函数
   int      (*stop)(struct net_device *dev);
   //初始化数据包的传输
   int      (*hard_start_xmit) (struct sk_buff *skb,
                struct net_device *dev);
#define HAVE_NETDEV_POLL
   //轮询函数
   int      (*poll) (struct net_device *dev, int *quota);
   //建立硬件头信息
   int      (*hard_header) (struct sk_buff *skb,
              struct net_device *dev,
              unsigned short type,
              void *daddr,
              void *saddr,
              unsigned len);
   //ARP解析之后,重构头部
   int      (*rebuild_header)(struct sk_buff *skb);
#define HAVE_MULTICAST   
   //多播支持函数  
   void     (*set_multicast_list)(struct net_device *dev);
#define HAVE_SET_MAC_ADDR    
   int      (*set_mac_address)(struct net_device *dev,
                void *addr);
#define HAVE_PRIVATE_IOCTL
   int      (*do_ioctl)(struct net_device *dev,
              struct ifreq *ifr, int cmd);
#define HAVE_SET_CONFIG
   int      (*set_config)(struct net_device *dev,
               struct ifmap *map);
#define HAVE_HEADER_CACHE
   int      (*hard_header_cache)(struct neighbour *neigh,
                 struct hh_cache *hh);
   void     (*header_cache_update)(struct hh_cache *hh,
                  struct net_device *dev,
                  unsigned char * haddr);
#define HAVE_CHANGE_MTU
   int      (*change_mtu)(struct net_device *dev, int new_mtu);
#define HAVE_TX_TIMEOUT
   void     (*tx_timeout) (struct net_device *dev);
   void     (*vlan_rx_register)(struct net_device *dev,
                struct vlan_group *grp);
   void     (*vlan_rx_add_vid)(struct net_device *dev,
                unsigned short vid);
   void     (*vlan_rx_kill_vid)(struct net_device *dev,
                unsigned short vid);
   int      (*hard_header_parse)(struct sk_buff *skb,
                 unsigned char *haddr);
   int      (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
   int      (*accept_fastpath)(struct net_device *, struct dst_entry*);
#ifdef CONFIG_NETPOLL
   int      netpoll_rx;
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
   void          (*poll_controller)(struct net_device *dev);
#endif
   /* bridge stuff */
   //对应的网桥端口(以后分析)
   struct net_bridge_port *br_port;
#ifdef CONFIG_NET_DIVERT
   /* this will get initialized at each interface type init routine */
   struct divert_blk *divert;
#endif /* CONFIG_NET_DIVERT */
   /* class/net/name entry */
   struct class_device  class_dev;
   /* how much padding had been added by alloc_netdev() */
   int padded;
}


  太多的成员。太庞大了。不要紧,等到要使用到相应成员的时候再来解释好了。

注意到这么庞大的结构中,有个成员叫: struct net_device *next,呵呵,很熟悉吧,就是用它来建立网络设备的链表。每一个网络设备启动的时候,都会调用register_netdev()(drivers/net/net_init.c)

跟踪这个函数:

int register_netdev(struct net_device *dev)
{
   int err;
   rtnl_lock();
   /*
   *  If the name is a format string the caller wants us to
   *  do a name allocation
   */
   if (strchr(dev->name, '%'))
   {
     err = dev_alloc_name(dev, dev->name);
     if (err < 0)
       goto out;
   }
   /*
   *  Back compatibility hook. Kill this one in 2.5
   */
   if (dev->name[0]==0 || dev->name[0]==' ')
   {
     err = dev_alloc_name(dev, "eth%d");
     if (err < 0)
       goto out;
   }
   err = register_netdevice(dev);
out:
   rtnl_unlock();
   return err;
}


跟踪至: register_netdevice(struct net_device *dev) (net/core/dev.c)

int register_netdevice(struct net_device *dev)
{
   struct hlist_head *head;
   struct hlist_node *p;
   int ret;
   BUG_ON(dev_boot_phase);
   ASSERT_RTNL();
   /* When net_device's are persistent, this will be fatal. */
   BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
   spin_lock_init(&dev->queue_lock);
   spin_lock_init(&dev->xmit_lock);
   dev->xmit_lock_owner = -1;
#ifdef CONFIG_NET_CLS_ACT
   spin_lock_init(&dev->ingress_lock);
#endif
   ret = alloc_divert_blk(dev);
   if (ret)
     goto out;
   dev->iflink = -1;
   /* Init, if this function is available */
   //如果dev -> init 被赋值,那么调用此函数
   if (dev->init) {
     ret = dev->init(dev);
     if (ret) {
       if (ret > 0)
          ret = -EIO;
       goto out_err;
     }
   }
   //判断name 是否合法
   if (!dev_valid_name(dev->name)) {
     ret = -EINVAL;
     goto out_err;
   }
   //为此设备分配一个index
   dev->ifindex = dev_new_index();
   if (dev->iflink == -1)
     dev->iflink = dev->ifindex;
   /* Check for existence of name */
   //所有网络设备,以名字作为哈希主键存在dev_name_head中,该变量是一个哈希数组
   //找到该名字对应的链表
   //如果内核中已经含有此名字的网络设备,出错退出
   head = dev_name_hash(dev->name);
   hlist_for_each(p, head) {
     struct net_device *d
       = hlist_entry(p, struct net_device, name_hlist);
     if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
       ret = -EEXIST;
       goto out_err;
     }
   }
   /* Fix illegal SG+CSUM combinations. */
   if ((dev->features & NETIF_F_SG) &&
     !(dev->features & (NETIF_F_IP_CSUM |
           NETIF_F_NO_CSUM |
           NETIF_F_HW_CSUM))) {
     printk("%s: Dropping NETIF_F_SG since no checksum feature.n",
         dev->name);
     dev->features &= ~NETIF_F_SG;
   }
   /*
   *  nil rebuild_header routine,
   *  that should be never called and used as just bug trap.
   */
   //为rebuild_header赋默认值
   if (!dev->rebuild_header)
     dev->rebuild_header = default_rebuild_header;
   /*
   *  Default initial state at registry is that the
   *  device is present.
   */
   set_bit(__LINK_STATE_PRESENT, &dev->state);
   dev->next = NULL;
   dev_init_scheduler(dev);
   write_lock_bh(&dev_base_lock);
   //初始化的时候,有struct net_device **dev_tail = &dev_base;
   //这段代码的意思实际就是:把dev加入dev_base为首结点队链表的尾部
   *dev_tail = dev;
   dev_tail = &dev->next;
   //把此结点加入到以名字为哈希主键的链表数组dev_name_head中
   hlist_add_head(&dev->name_hlist, head);
   //把此结点加到以序号为主键的链表数组dev_index_head中
   hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
   dev_hold(dev);
   dev->reg_state = NETREG_REGISTERING;
   write_unlock_bh(&dev_base_lock);
   /* Notify protocols, that a new device appeared. */
   //在通知链表上发送事件
   notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
   /* Finish registration after unlock */
   net_set_todo(dev);
   ret = 0;
out:
   return ret;
out_err:
   free_divert_blk(dev);
   goto out;
}


从此可以看出。新加入一个设备时,会插入三个位置:以名字为哈希值组织的dev_name_head ,以序号为主链的哈希数组dev_index_head.还有dev_base.它为快速查找网络设备提供了基础。事实上。在内核中,经常要根据index找到dev. 或者根据name找到dev.我们遇到的时候再分析到现在,我们可以在内核中顺藤摸瓜的找到每一个网络设备了。还有很重要的。设备更改了配置,要怎么通知跟他相关的子系统呢?例如,网卡更新了IP,如何使路由得到更新?接着往下看:注意到上面注册代码中所调用的一个函数notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev).该函数的作用是,在通知链表上netdev_chain上发送NETDEV_REGISTER消息,所有在与该通知链表关联的子系统都可以收到此消息。以此,可以快速的更新整个系统的配置消息。以路由子系统为例,来讲述该过程:在IPV4子系统加载的时候,加调用ip_init(),接着调用fib_init(),然后再调用ip_fib_init()跟踪一下此函数:
void __init ip_fib_init(void)
{
#ifndef CONFIG_IP_MULTIPLE_TABLES
   ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
   ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
#else
   fib_rules_init();
#endif
   register_netdevice_notifier(&fib_netdev_notifier);
   register_inetaddr_notifier(&fib_inetaddr_notifier);
}


register_netdevice_notifier是做什么的呢?

往下跟踪:

int register_netdevice_notifier(struct notifier_block *nb)
{
   struct net_device *dev;
   int err;
   rtnl_lock();
   //注册通知链
   err = notifier_chain_register(&netdev_chain, nb);
   if (!err) {
     for (dev = dev_base; dev; dev = dev->next) {
       nb->notifier_call(nb, NETDEV_REGISTER, dev);
       if (dev->flags & IFF_UP)
          nb->notifier_call(nb, NETDEV_UP, dev);
     }
   }
   rtnl_unlock();
   return err;
}


 
呵呵,它在netdev_chain上注册了通知链,当此链上有事件发生时,会调用fib_netdev_notifiers中的相关信息处理,看一下fib_netdev_notifier的信息:

struct notifier_block fib_netdev_notifier = {
   .notifier_call =fib_netdev_event,
};


OK,现在越来越具体了,如果netdev_chain有事件,会调用fib_netdev_event处理。

继续跟踪:

static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
   struct net_device *dev = ptr;
   struct in_device *in_dev = __in_dev_get(dev);
   //设备注销
   if (event == NETDEV_UNREGISTER) {
     fib_disable_ip(dev, 2);
     return NOTIFY_DONE;
   }
   if (!in_dev)
     return NOTIFY_DONE;
   switch (event) {
   //设备UP
   case NETDEV_UP:
     for_ifa(in_dev) {
       fib_add_ifaddr(ifa);
     } endfor_ifa(in_dev);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
     fib_sync_up(dev);
#endif
     rt_cache_flush(-1);
     break;
   //设备DOWN
   case NETDEV_DOWN:
     fib_disable_ip(dev, 0);
     break;
   //设备参数改变
   case NETDEV_CHANGEMTU:
   case NETDEV_CHANGE:
     rt_cache_flush(0);
     break;
   }
   return NOTIFY_DONE;
}


  路由部份的代码将在后续的笔记中给出。至此,整个网络设备的架构非常的清晰了!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: