当内核无法查找到流表项的时候,则会通过upcall来调用用户态ovs-vswtichd中的flow table。
会调用ofproto-dpif-upcall.c中的udpif_upcall_handler函数。
staticvoid * udpif_upcall_handler(void *arg) { struct handler *handler = arg; struct udpif *udpif = handler->udpif; while (!latch_is_set(&handler->udpif->exit_latch)) { if (recv_upcalls(handler)) { poll_immediate_wake(); } else { dpif_recv_wait(udpif->dpif, handler->handler_id); latch_wait(&udpif->exit_latch); } poll_block(); } return NULL; }
会调用static size_t recv_upcalls(struct handler *handler)
在这个函数里面
(1) 首先读取upcall调用static int upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, const struct dp_packet *packet, enum dpif_upcall_type type, const struct nlattr *userdata, const struct flow *flow, const unsigned int mru, const ovs_u128 *ufid, const unsigned pmd_id)
(2) 其次提取包头调用void flow_extract(struct dp_packet *packet, struct flow *flow),提取出的flow如下:
struct flow { /* Metadata */ struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ ovs_be64 metadata; /* OpenFlow Metadata. */ uint32_t regs[FLOW_N_REGS]; /* Registers. */ uint32_t skb_priority; /* Packet priority for QoS. */ uint32_t pkt_mark; /* Packet mark. */ uint32_t dp_hash; /* Datapath computed hash value. The exact * computation is opaque to the user space. */ union flow_in_port in_port; /* Input port.*/ uint32_t recirc_id; /* Must be exact match. */ uint16_t ct_state; /* Connection tracking state. */ uint16_t ct_zone; /* Connection tracking zone. */ uint32_t ct_mark; /* Connection mark.*/ uint8_t pad1[4]; /* Pad to 64 bits. */ ovs_u128 ct_label; /* Connection label. */ uint32_t conj_id; /* Conjunction ID. */ ofp_port_t actset_output; /* Output port in action set. */ uint8_t pad2[2]; /* Pad to 64 bits. */ /* L2, Order the same as in the Ethernet header! (64-bit aligned) */ struct eth_addr dl_dst; /* Ethernet destination address. */ struct eth_addr dl_src; /* Ethernet source address. */ ovs_be16 dl_type; /* Ethernet frame type. */ ovs_be16 vlan_tci; /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */ ovs_be32 mpls_lse[ROUND_UP(FLOW_MAX_MPLS_LABELS, 2)]; /* MPLS label stack (with padding). */ /* L3 (64-bit aligned) */ ovs_be32 nw_src; /* IPv4 source address. */ ovs_be32 nw_dst; /* IPv4 destination address. */ structin6_addr ipv6_src; /* IPv6 source address. */ structin6_addr ipv6_dst; /* IPv6 destination address. */ ovs_be32 ipv6_label; /* IPv6 flow label. */ uint8_t nw_frag; /* FLOW_FRAG_* flags. */ uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */ uint8_t nw_ttl; /* IP TTL/Hop Limit. */ uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ structin6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ struct eth_addr arp_sha; /* ARP/ND source hardware address. */ struct eth_addr arp_tha; /* ARP/ND target hardware address. */ ovs_be16 tcp_flags; /* TCP flags. With L3 to avoid matching L4. */ ovs_be16 pad3; /* Pad to 64 bits. */ /* L4 (64-bit aligned) */ ovs_be16 tp_src; /* TCP/UDP/SCTP source port/ICMP type. */ ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */ ovs_be32 igmp_group_ip4; /* IGMP group IPv4 address. * Keep last for BUILD_ASSERT_DECL below. */ };
(3) 然后调用static int process_upcall(struct udpif *udpif, struct upcall *upcall, struct ofpbuf *odp_actions, struct flow_wildcards *wc)来处理upcall。
对于MISS_UPCALL,调用static void upcall_xlate(struct udpif *udpif, struct upcall *upcall, struct ofpbuf *odp_actions, struct flow_wildcards *wc)
switch (classify_upcall(upcall->type, userdata)) { case MISS_UPCALL: upcall_xlate(udpif, upcall, odp_actions, wc); return 0;
会调用enum xlate_error xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
在这个函数里面,会在flow table里面查找rule
ctx.rule = rule_dpif_lookup_from_table( ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc, ctx.xin->resubmit_stats, &ctx.table_id, flow->in_port.ofp_port, true, true);
找到rule之后,调用static void do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, struct xlate_ctx *ctx)在这个函数里面,根据action的不同,修改flow的内容。
switch (a->type) { case OFPACT_OUTPUT: xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port, ofpact_get_OUTPUT(a)->max_len, true); break; case OFPACT_SET_VLAN_VID: wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI); if (flow->vlan_tci & htons(VLAN_CFI) || ofpact_get_SET_VLAN_VID(a)->push_vlan_if_needed) { flow->vlan_tci &= ~htons(VLAN_VID_MASK); flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid) | htons(VLAN_CFI)); } break; case OFPACT_SET_ETH_SRC: WC_MASK_FIELD(wc, dl_src); flow->dl_src = ofpact_get_SET_ETH_SRC(a)->mac; break; case OFPACT_SET_ETH_DST: WC_MASK_FIELD(wc, dl_dst); flow->dl_dst = ofpact_get_SET_ETH_DST(a)->mac; break; case OFPACT_SET_IPV4_SRC: CHECK_MPLS_RECIRCULATION(); if (flow->dl_type == htons(ETH_TYPE_IP)) { memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4; } break; case OFPACT_SET_IPV4_DST: CHECK_MPLS_RECIRCULATION(); if (flow->dl_type == htons(ETH_TYPE_IP)) { memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst); flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4; } break; case OFPACT_SET_L4_SRC_PORT: CHECK_MPLS_RECIRCULATION(); if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) { memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port); } break; case OFPACT_SET_L4_DST_PORT: CHECK_MPLS_RECIRCULATION(); if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) { memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port); } break;
(4) 最后调用static void handle_upcalls(struct udpif *udpif, struct upcall *upcalls, size_t n_upcalls)将flow rule添加到内核中的datapath
他会调用void dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops),他会调用dpif->dpif_class->operate(dpif, ops, chunk);
会调用dpif_netlink_operate()
staticvoid dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) { struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); while (n_ops > 0) { size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops); ops += chunk; n_ops -= chunk; } }
在static size_t dpif_netlink_operate__(struct dpif_netlink *dpif, struct dpif_op **ops, size_t n_ops)中,有以下的代码:
switch (op->type) { case DPIF_OP_FLOW_PUT: put = &op->u.flow_put; dpif_netlink_init_flow_put(dpif, put, &flow); if (put->stats) { flow.nlmsg_flags |= NLM_F_ECHO; aux->txn.reply = &aux->reply; } dpif_netlink_flow_to_ofpbuf(&flow, &aux->request); break; case DPIF_OP_FLOW_DEL: del = &op->u.flow_del; dpif_netlink_init_flow_del(dpif, del, &flow); if (del->stats) { flow.nlmsg_flags |= NLM_F_ECHO; aux->txn.reply = &aux->reply; } dpif_netlink_flow_to_ofpbuf(&flow, &aux->request); break;
会调用netlink修改内核中datapath的规则。