您的位置:首页 > 编程语言

Openvswitch原理与代码分析(2): ovs-vswitchd的启动

2016-09-13 23:39 447 查看
ovs-vswitchd.c的main函数最终会进入一个while循环,在这个无限循环中,里面最重要的两个函数是bridge_run()和netdev_run()。

 



 

Openvswitch主要管理两种类型的设备,一个是创建的虚拟网桥,一个是连接到虚拟网桥上的设备。

 

其中bridge_run就是初始化数据库中已经创建的虚拟网桥。

 


一、虚拟网桥的初始化bridge_run

 

bridge_run会调用bridge_run__,bridge_run__中最重要的是对于所有的网桥,都调用ofproto_run

 

static void

bridge_run__(void)

{

……

    /* Let each bridge do the work that it needs to do. */

    HMAP_FOR_EACH (br, node, &all_bridges) {

        ofproto_run(br->ofproto);

    }

}

 

Int ofproto_run(struct ofproto *p)会调用error = p->ofproto_class->run(p);

 

ofproto_class的定义在ofproto-provider.h中,它的实现定义在ofproto-dpif.c中,这里面的所有的函数,在这个文件中都有定义。

 

const struct ofproto_class
ofproto_dpif_class = {

    init,

    enumerate_types,

    enumerate_names,

    del,

    port_open_type,

    type_run,

    type_wait,

    alloc,

    construct,

    destruct,

    dealloc,

    run,

    wait,

    NULL, /* get_memory_usage. */

    type_get_memory_usage,

    flush,

    query_tables,

    set_tables_version,

    port_alloc,

    port_construct,

    port_destruct,

    port_dealloc,

    port_modified,

    port_reconfigured,

    port_query_by_name,

    port_add,

    port_del,

    port_get_stats,

    port_dump_start,

    port_dump_next,

    port_dump_done,

    port_poll,

    port_poll_wait,

    port_is_lacp_current,

    port_get_lacp_stats,

    NULL, /* rule_choose_table */

    rule_alloc,

    rule_construct,

    rule_insert,

    rule_delete,

    rule_destruct,

    rule_dealloc,

    rule_get_stats,

    rule_execute,

    set_frag_handling,

    packet_out,

    set_netflow,

    get_netflow_ids,

    set_sflow,

    set_ipfix,

    set_cfm,

    cfm_status_changed,

    get_cfm_status,

    set_lldp,

    get_lldp_status,

    set_aa,

    aa_mapping_set,

    aa_mapping_unset,

    aa_vlan_get_queued,

    aa_vlan_get_queue_size,

    set_bfd,

    bfd_status_changed,

    get_bfd_status,

    set_stp,

    get_stp_status,

    set_stp_port,

    get_stp_port_status,

    get_stp_port_stats,

    set_rstp,

    get_rstp_status,

    set_rstp_port,

    get_rstp_port_status,

    set_queues,

    bundle_set,

    bundle_remove,

    mirror_set__,

    mirror_get_stats__,

    set_flood_vlans,

    is_mirror_output_bundle,

    forward_bpdu_changed,

    set_mac_table_config,

    set_mcast_snooping,

    set_mcast_snooping_port,

    set_realdev,

    NULL, /* meter_get_features */

    NULL, /* meter_set */

    NULL, /* meter_get */

    NULL, /* meter_del */

    group_alloc, /* group_alloc */

    group_construct, /* group_construct */

    group_destruct, /* group_destruct */

    group_dealloc, /* group_dealloc */

    group_modify, /* group_modify */

    group_get_stats, /* group_get_stats */

    get_datapath_version, /* get_datapath_version */

};

 

在ofproto-provider.h中注释里是这样说的。

这里定义了四类数据结构

Struct ofproto表示一个交换机

Struct ofport表示交换机上的一个端口

Struct rule表示交换机上的一条flow规则

Struct ofgroup表示一个flow规则组

 

上面说到启动的过程中,会调用ofproto_class->run,也即会调用ofproto-dpif.c中的static int run(struct ofproto *ofproto_)函数。

 

在这个函数中,会初始化netflow, sflow, ipfix,stp, rstp, mac address learning等一系列操作。

 

bridge_run还会调用static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg),其中ovs_cfg是从ovsdb-server里面读取出来的配置。

 

在这个函数里面,对于每一个网桥,将网卡添加进去

HMAP_FOR_EACH (br, node, &all_bridges) {

    bridge_add_ports(br, &br->wanted_ports);

    shash_destroy(&br->wanted_ports);

}

 

static void

bridge_add_ports(struct bridge *br, const struct shash
*wanted_ports)

{

    /* First add interfaces that request a particular port number. */

    bridge_add_ports__(br, wanted_ports, true);

 

    /* Then add interfaces that want automatic port number assignment.

     * We add these afterward to avoid accidentally taking a specifically

     * requested port number. */

    bridge_add_ports__(br, wanted_ports, false);

}

 

static void bridge_add_ports__(struct bridge *br, const struct shash *wanted_ports, bool with_requested_port)会调用

static bool iface_create(struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg)会调用

static int iface_do_create(const struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg, ofp_port_t *ofp_portp, struct netdev **netdevp, char **errp)会调用

int ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev, ofp_port_t *ofp_portp)会调用

 

error = ofproto->ofproto_class->port_add(ofproto, netdev);

 

会调用ofproto-dpif.c中的ofproto_dpif_class的static int port_add(struct ofproto *ofproto_, struct netdev *netdev)函数。

 

会调用int dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)会调用

 

error = dpif->dpif_class->port_add(dpif, netdev, &port_no);

 

会调用dpif_netlink_class的port_add函数,也即dpif_netlink_port_add,也即

static int dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,odp_port_t *port_nop)会调用

static int dpif_netlink_port_add__(struct dpif_netlink *dpif, struct netdev *netdev, odp_port_t *port_nop)

 

在这个函数里面,会调用netlink的API,命令为OVS_VPORT_CMD_NEW

 

const char *name = netdev_vport_get_dpif_port(netdev,

                                                  namebuf, sizeof namebuf);

struct dpif_netlink_vport request, reply;

struct nl_sock **socksp = NULL;

 

if (dpif->handlers) {

    socksp = vport_create_socksp(dpif, &error);

    if (!socksp) {

        return error;

    }

}

 

dpif_netlink_vport_init(&request);

request.cmd = OVS_VPORT_CMD_NEW;

request.dp_ifindex = dpif->dp_ifindex;

request.type = netdev_to_ovs_vport_type(netdev);

 

request.name = name;

 

upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);

request.n_upcall_pids = socksp ? dpif->n_handlers : 1;

request.upcall_pids = upcall_pids;

error = dpif_netlink_vport_transact(&request, &reply, &buf);

 

这里会调用内核模块openvswitch.ko,在内核中添加虚拟网卡。这部分详细的过程将在下一节分析。


二、虚拟网卡的初始化netdev_run()

 

void

netdev_run(void)

    OVS_EXCLUDED(netdev_class_mutex, netdev_mutex)

{

    struct netdev_registered_class *rc;

 

    netdev_initialize();

    ovs_mutex_lock(&netdev_class_mutex);

    HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) {

        if (rc->class->run)
{

            rc->class->run();

        }

    }

    ovs_mutex_unlock(&netdev_class_mutex);

}

 

依次循环调用netdev_classes中的每一个run。

 

对于不同类型的虚拟网卡,都有对应的netdev_class。

 

例如对于dpdk的网卡有

 

static const struct netdev_class
dpdk_class =

    NETDEV_DPDK_CLASS(

        "dpdk",

        NULL,

        netdev_dpdk_construct,

        netdev_dpdk_destruct,

        netdev_dpdk_set_multiq,

        netdev_dpdk_eth_send,

        netdev_dpdk_get_carrier,

        netdev_dpdk_get_stats,

        netdev_dpdk_get_features,

        netdev_dpdk_get_status,

        netdev_dpdk_rxq_recv);

 

对于物理网卡,也需要有相应的netdev_class

 

const struct netdev_class
netdev_linux_class =

    NETDEV_LINUX_CLASS(

        "system",

        netdev_linux_construct,

        netdev_linux_get_stats,

        netdev_linux_get_features,

        netdev_linux_get_status);

 

对于连接到KVM的tap网卡

const struct netdev_class
netdev_tap_class =

    NETDEV_LINUX_CLASS(

        "tap",

        netdev_linux_construct_tap,

        netdev_tap_get_stats,

        netdev_linux_get_features,

        netdev_linux_get_status);

 

对于虚拟的软网卡,比如veth pair

const struct netdev_class
netdev_internal_class =

    NETDEV_LINUX_CLASS(

        "internal",

        netdev_linux_construct,

        netdev_internal_get_stats,

        NULL, /* get_features */

        netdev_internal_get_status);

 

其中NETDEV_LINUX_CLASS是一个宏,不是所有的参数都需要全部填写。

#define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, \

                           GET_FEATURES, GET_STATUS) \

{ \

    NAME, \

                                                                \

    NULL, \

    netdev_linux_run, \

    netdev_linux_wait, \

                                                                \

    netdev_linux_alloc, \

    CONSTRUCT, \

    netdev_linux_destruct, \

    netdev_linux_dealloc, \

    NULL, /* get_config */ \

    NULL, /* set_config */ \

    NULL, /* get_tunnel_config */ \

    NULL, /* build header */ \

    NULL, /* push header */ \

    NULL, /* pop header */ \

    NULL, /* get_numa_id */ \

    NULL, /* set_multiq */ \

                                                                \

    netdev_linux_send, \

    netdev_linux_send_wait, \

                                                                \

    netdev_linux_set_etheraddr, \

    netdev_linux_get_etheraddr, \

    netdev_linux_get_mtu, \

    netdev_linux_set_mtu, \

    netdev_linux_get_ifindex, \

    netdev_linux_get_carrier, \

    netdev_linux_get_carrier_resets, \

    netdev_linux_set_miimon_interval, \

    GET_STATS, \

                                                                \

    GET_FEATURES, \

    netdev_linux_set_advertisements, \

                                                                \

    netdev_linux_set_policing, \

    netdev_linux_get_qos_types, \

    netdev_linux_get_qos_capabilities, \

    netdev_linux_get_qos, \

    netdev_linux_set_qos, \

    netdev_linux_get_queue, \

    netdev_linux_set_queue, \

    netdev_linux_delete_queue, \

    netdev_linux_get_queue_stats, \

    netdev_linux_queue_dump_start, \

    netdev_linux_queue_dump_next, \

    netdev_linux_queue_dump_done, \

    netdev_linux_dump_queue_stats, \

                                                                \

    netdev_linux_get_in4, \

    netdev_linux_set_in4, \

    netdev_linux_get_in6, \

    netdev_linux_add_router, \

    netdev_linux_get_next_hop, \

    GET_STATUS, \

    netdev_linux_arp_lookup, \

                                                                \

    netdev_linux_update_flags, \

                                                                \

    netdev_linux_rxq_alloc, \

    netdev_linux_rxq_construct, \

    netdev_linux_rxq_destruct, \

    netdev_linux_rxq_dealloc, \

    netdev_linux_rxq_recv, \

    netdev_linux_rxq_wait, \

    netdev_linux_rxq_drain, \

}

 

rc->class->run()调用的是netdev-linux.c下的netdev_linux_run

 

netdev_linux_run会调用netlink的sock得到虚拟网卡的状态,并且更新状态。

 

error = nl_sock_recv(sock, &buf, false);

if (!error) {

    struct rtnetlink_change change;

    if (rtnetlink_parse(&buf, &change)) {

        struct netdev *netdev_ = netdev_from_name(change.ifname);

        if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {

           struct netdev_linux *netdev = netdev_linux_cast(netdev_);

           ovs_mutex_lock(&netdev->mutex);

           netdev_linux_update(netdev, &change);

           ovs_mutex_unlock(&netdev->mutex);

        }

        netdev_close(netdev_);

     }

}

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: