Openvswitch原理与代码分析(2): ovs-vswitchd的启动

xiaoxiao2021-02-28  100

ovs-vswitchd.c的main函数最终会进入一个while循环,在这个无限循环中,里面最重要的两个函数是bridge_run()和netdev_run()。

 

 

Openvswitch主要管理两种类型的设备,一个是创建的虚拟网桥,一个是连接到虚拟网桥上的设备。

 

其中bridge_run就是初始化数据库中已经创建的虚拟网桥。

 

一、虚拟网桥的初始化bridge_run

 

bridge_run会调用bridge_run__,bridge_run__中最重要的是对于所有的网桥,都调用ofproto_run

 

staticvoid bridge_run__(void) { ……     /* Let each bridge do the work that it needs to do. */     HMAP_FOR_EACH (br, node, &all_bridges) {         ofproto_run(br->ofproto);     } }

 

Int ofproto_run(struct ofproto *p)会调用error = p->ofproto_class->run(p);

 

ofproto_class的定义在ofproto-provider.h中,它的实现定义在ofproto-dpif.c中,这里面的所有的函数,在这个文件中都有定义。

 

conststruct ofproto_class ofproto_dpif_class = {     init,     enumerate_types,     enumerate_names,     del,     port_open_type,     type_run,     type_wait,     alloc,     construct,     destruct,     dealloc,     run,     wait,     NULL, /* get_memory_usage. */     type_get_memory_usage,     flush,     query_tables,     set_tables_version,     port_alloc,     port_construct,     port_destruct,     port_dealloc,     port_modified,     port_reconfigured,     port_query_by_name,     port_add,     port_del,     port_get_stats,     port_dump_start,     port_dump_next,     port_dump_done,     port_poll,     port_poll_wait,     port_is_lacp_current,     port_get_lacp_stats,     NULL, /* rule_choose_table */     rule_alloc,     rule_construct,     rule_insert,     rule_delete,     rule_destruct,     rule_dealloc,     rule_get_stats,     rule_execute,     set_frag_handling,     packet_out,     set_netflow,     get_netflow_ids,     set_sflow,     set_ipfix,     set_cfm,     cfm_status_changed,     get_cfm_status,     set_lldp,     get_lldp_status,     set_aa,     aa_mapping_set,     aa_mapping_unset,     aa_vlan_get_queued,     aa_vlan_get_queue_size,     set_bfd,     bfd_status_changed,     get_bfd_status,     set_stp,     get_stp_status,     set_stp_port,     get_stp_port_status,     get_stp_port_stats,     set_rstp,     get_rstp_status,     set_rstp_port,     get_rstp_port_status,     set_queues,     bundle_set,     bundle_remove,     mirror_set__,     mirror_get_stats__,     set_flood_vlans,     is_mirror_output_bundle,     forward_bpdu_changed,     set_mac_table_config,     set_mcast_snooping,     set_mcast_snooping_port,     set_realdev,     NULL, /* meter_get_features */     NULL, /* meter_set */     NULL, /* meter_get */     NULL, /* meter_del */     group_alloc, /* group_alloc */     group_construct, /* group_construct */     group_destruct, /* group_destruct */     group_dealloc, /* group_dealloc */     group_modify, /* group_modify */     group_get_stats, /* group_get_stats */     get_datapath_version, /* get_datapath_version */ };

 

在ofproto-provider.h中注释里是这样说的。

这里定义了四类数据结构

Struct ofproto表示一个交换机

Struct ofport表示交换机上的一个端口

Struct rule表示交换机上的一条flow规则

Struct ofgroup表示一个flow规则组

 

上面说到启动的过程中,会调用ofproto_class->run,也即会调用ofproto-dpif.c中的static int run(struct ofproto *ofproto_)函数。

 

在这个函数中,会初始化netflow, sflow, ipfix,stp, rstp, mac address learning等一系列操作。

 

bridge_run还会调用static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg),其中ovs_cfg是从ovsdb-server里面读取出来的配置。

 

在这个函数里面,对于每一个网桥,将网卡添加进去

HMAP_FOR_EACH (br, node, &all_bridges) {     bridge_add_ports(br, &br->wanted_ports);     shash_destroy(&br->wanted_ports); }

 

staticvoid bridge_add_ports(struct bridge *br, conststruct shash *wanted_ports) {     /* First add interfaces that request a particular port number. */     bridge_add_ports__(br, wanted_ports, true);       /* Then add interfaces that want automatic port number assignment.      * We add these afterward to avoid accidentally taking a specifically      * requested port number. */     bridge_add_ports__(br, wanted_ports, false); }

 

static void bridge_add_ports__(struct bridge *br, const struct shash *wanted_ports, bool with_requested_port)会调用

static bool iface_create(struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg)会调用

static int iface_do_create(const struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg, ofp_port_t *ofp_portp, struct netdev **netdevp, char **errp)会调用

int ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev, ofp_port_t *ofp_portp)会调用

 

error = ofproto->ofproto_class->port_add(ofproto, netdev);

 

会调用ofproto-dpif.c中的ofproto_dpif_class的static int port_add(struct ofproto *ofproto_, struct netdev *netdev)函数。

 

会调用int dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)会调用

 

error = dpif->dpif_class->port_add(dpif, netdev, &port_no);

 

会调用dpif_netlink_class的port_add函数,也即dpif_netlink_port_add,也即

static int dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,odp_port_t *port_nop)会调用

static int dpif_netlink_port_add__(struct dpif_netlink *dpif, struct netdev *netdev, odp_port_t *port_nop)

 

在这个函数里面,会调用netlink的API,命令为OVS_VPORT_CMD_NEW

 

constchar *name = netdev_vport_get_dpif_port(netdev,                                                   namebuf, sizeof namebuf); struct dpif_netlink_vport request, reply; struct nl_sock **socksp = NULL;   if (dpif->handlers) {     socksp = vport_create_socksp(dpif, &error);     if (!socksp) {         return error;     } }   dpif_netlink_vport_init(&request); request.cmd = OVS_VPORT_CMD_NEW; request.dp_ifindex = dpif->dp_ifindex; request.type = netdev_to_ovs_vport_type(netdev);   request.name = name;   upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers); request.n_upcall_pids = socksp ? dpif->n_handlers : 1; request.upcall_pids = upcall_pids; error = dpif_netlink_vport_transact(&request, &reply, &buf);

 

这里会调用内核模块openvswitch.ko,在内核中添加虚拟网卡。这部分详细的过程将在下一节分析。

二、虚拟网卡的初始化netdev_run()

 

void netdev_run(void)     OVS_EXCLUDED(netdev_class_mutex, netdev_mutex) {     struct netdev_registered_class *rc;       netdev_initialize();     ovs_mutex_lock(&netdev_class_mutex);     HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) {         if (rc->class->run) {             rc->class->run();         }     }     ovs_mutex_unlock(&netdev_class_mutex); }

 

依次循环调用netdev_classes中的每一个run。

 

对于不同类型的虚拟网卡,都有对应的netdev_class。

 

例如对于dpdk的网卡有

 

staticconststruct netdev_class dpdk_class =     NETDEV_DPDK_CLASS(         "dpdk",         NULL,         netdev_dpdk_construct,         netdev_dpdk_destruct,         netdev_dpdk_set_multiq,         netdev_dpdk_eth_send,         netdev_dpdk_get_carrier,         netdev_dpdk_get_stats,         netdev_dpdk_get_features,         netdev_dpdk_get_status,         netdev_dpdk_rxq_recv);

 

对于物理网卡,也需要有相应的netdev_class

 

conststruct netdev_class netdev_linux_class =     NETDEV_LINUX_CLASS(         "system",         netdev_linux_construct,         netdev_linux_get_stats,         netdev_linux_get_features,         netdev_linux_get_status);

 

对于连接到KVM的tap网卡

conststruct netdev_class netdev_tap_class =     NETDEV_LINUX_CLASS(         "tap",         netdev_linux_construct_tap,         netdev_tap_get_stats,         netdev_linux_get_features,         netdev_linux_get_status);

 

对于虚拟的软网卡,比如veth pair

conststruct netdev_class netdev_internal_class =     NETDEV_LINUX_CLASS(         "internal",         netdev_linux_construct,         netdev_internal_get_stats,         NULL, /* get_features */         netdev_internal_get_status);

 

其中NETDEV_LINUX_CLASS是一个宏,不是所有的参数都需要全部填写。

#define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, \                            GET_FEATURES, GET_STATUS) \ { \     NAME, \                                                                 \     NULL, \     netdev_linux_run, \     netdev_linux_wait, \                                                                 \     netdev_linux_alloc, \     CONSTRUCT, \     netdev_linux_destruct, \     netdev_linux_dealloc, \     NULL, /* get_config */ \     NULL, /* set_config */ \     NULL, /* get_tunnel_config */ \     NULL, /* build header */ \     NULL, /* push header */ \     NULL, /* pop header */ \     NULL, /* get_numa_id */ \     NULL, /* set_multiq */ \                                                                 \     netdev_linux_send, \     netdev_linux_send_wait, \                                                                 \     netdev_linux_set_etheraddr, \     netdev_linux_get_etheraddr, \     netdev_linux_get_mtu, \     netdev_linux_set_mtu, \     netdev_linux_get_ifindex, \     netdev_linux_get_carrier, \     netdev_linux_get_carrier_resets, \     netdev_linux_set_miimon_interval, \     GET_STATS, \                                                                 \     GET_FEATURES, \     netdev_linux_set_advertisements, \                                                                 \     netdev_linux_set_policing, \     netdev_linux_get_qos_types, \     netdev_linux_get_qos_capabilities, \     netdev_linux_get_qos, \     netdev_linux_set_qos, \     netdev_linux_get_queue, \     netdev_linux_set_queue, \     netdev_linux_delete_queue, \     netdev_linux_get_queue_stats, \     netdev_linux_queue_dump_start, \     netdev_linux_queue_dump_next, \     netdev_linux_queue_dump_done, \     netdev_linux_dump_queue_stats, \                                                                 \     netdev_linux_get_in4, \     netdev_linux_set_in4, \     netdev_linux_get_in6, \     netdev_linux_add_router, \     netdev_linux_get_next_hop, \     GET_STATUS, \     netdev_linux_arp_lookup, \                                                                 \     netdev_linux_update_flags, \                                                                 \     netdev_linux_rxq_alloc, \     netdev_linux_rxq_construct, \     netdev_linux_rxq_destruct, \     netdev_linux_rxq_dealloc, \     netdev_linux_rxq_recv, \     netdev_linux_rxq_wait, \     netdev_linux_rxq_drain, \ }

 

rc->class->run()调用的是netdev-linux.c下的netdev_linux_run

 

netdev_linux_run会调用netlink的sock得到虚拟网卡的状态,并且更新状态。

 

error = nl_sock_recv(sock, &buf, false); if (!error) {     struct rtnetlink_change change;     if (rtnetlink_parse(&buf, &change)) {         struct netdev *netdev_ = netdev_from_name(change.ifname);         if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {            struct netdev_linux *netdev = netdev_linux_cast(netdev_);            ovs_mutex_lock(&netdev->mutex);            netdev_linux_update(netdev, &change);            ovs_mutex_unlock(&netdev->mutex);         }         netdev_close(netdev_);      } }
转载请注明原文地址: https://www.6miu.com/read-32507.html

最新回复(0)