From ed686d02d34b1f7793be674b99cf2a127d09704d Mon Sep 17 00:00:00 2001 From: Divakar Date: Mon, 29 Aug 2016 22:32:00 +0530 Subject: [PATCH] Respond with stitched MAC for unicast ARP request When an unicast ARP request is received on fabric interface of a compute node, from BMS behind QFX, if MX is in ecmp, the source IP lookup might point to subnet route pointing to Ecmp nexthop. This is because there would not be any host route for BMS in inet table. This results in Vrouter responding with Vhost mac address though the destination IP address is stitched. As such this behaviour is to ensure that Routing is forced if the packet is from Ecmp source, though destination is in same subnet. But this behaviour creates issues to BMS behind QFX, if BMS refreshes ARP with unicast ARP request. As a fix, the multicast ARP requests from Ecmp source on fabric interface are dropped. If unicast ARP requests and if the destination mac address of the ethernet packet is stitched mac, the ARP reply is sent with stitched mac. If destination mac address does not match with stitched mac, that ARP request is not processed by Vrouter. closes-bug: #1594165 Change-Id: I72f4c44329b14c589343b30b84181cc8e0e05e81 --- dp-core/vr_datapath.c | 73 +++++++++++++++++++++++++++++++++--------- dp-core/vr_nexthop.c | 26 ++++++++------- dp-core/vr_proto_ip6.c | 5 ++- include/vr_datapath.h | 5 +-- 4 files changed, 80 insertions(+), 29 deletions(-) diff --git a/dp-core/vr_datapath.c b/dp-core/vr_datapath.c index 0f051b61d..6a1e33daf 100644 --- a/dp-core/vr_datapath.c +++ b/dp-core/vr_datapath.c @@ -20,14 +20,15 @@ vr_get_proxy_mac(struct vr_packet *pkt, struct vr_forwarding_md *fmd, struct vr_route_req *rt, unsigned char *dmac) { bool from_fabric, stitched, flood; - bool to_gateway, no_proxy, to_vcp; + bool to_gateway, no_proxy, to_vcp, ecmp_src; unsigned char *resp_mac; struct vr_nexthop *nh = NULL; struct vr_interface *vif = pkt->vp_if; struct vr_vrf_stats *stats; - from_fabric = stitched = flood = to_gateway = to_vcp = no_proxy = false; + from_fabric = stitched = flood = false; + to_gateway = to_vcp = no_proxy = ecmp_src = false; stats = vr_inet_vrf_stats(fmd->fmd_dvrf, pkt->vp_cpu); /* here we will not check for stats, but will check before use */ @@ -65,11 +66,12 @@ vr_get_proxy_mac(struct vr_packet *pkt, struct vr_forwarding_md *fmd, } } - /* If ECMP source, we force routing */ - if (fmd->fmd_ecmp_src_nh_index != -1) { - resp_mac = vif->vif_mac; - fmd->fmd_ecmp_src_nh_index = -1; - } + /* If ECMP source, we force routing */ + if (fmd->fmd_ecmp_src_nh_index != -1) { + resp_mac = vif->vif_mac; + fmd->fmd_ecmp_src_nh_index = -1; + ecmp_src = true; + } /* @@ -87,6 +89,38 @@ vr_get_proxy_mac(struct vr_packet *pkt, struct vr_forwarding_md *fmd, * . arp request from the uplink port of a vcp */ if (from_fabric) { + if (ecmp_src) { + + /* + * If a Multicast ARP request, it is not answered on Fabric + * side + */ + if (IS_MAC_BMCAST(dmac)) + return MR_DROP; + + /* + * If unicast and not stiched, we do not have enough + * information what to respond. We can not even flood, + * probably because this need to be answered with Vrouter + * Mac in source Vrouter itself. So we drop this + */ + if (!stitched) + return MR_DROP; + + /* + * If our stiched mac does not match, we will let the VM + * decide what to do with request + */ + if (!VR_MAC_CMP(dmac, rt->rtr_req.rtr_mac)) + return MR_FLOOD; + + /* + * Very likely response need to go with stiched mac. But + * below conditions might override + */ + resp_mac = rt->rtr_req.rtr_mac; + } + if (flood && !stitched) { if (stats) stats->vrf_arp_physical_flood++; @@ -229,7 +263,7 @@ vr_arp_proxy(struct vr_arp *sarp, struct vr_packet *pkt, static int vr_handle_arp_request(struct vr_arp *sarp, struct vr_packet *pkt, - struct vr_forwarding_md *fmd) + struct vr_forwarding_md *fmd, unsigned char *eth_dmac) { bool handled = true; unsigned char dmac[VR_ETHER_ALEN]; @@ -237,6 +271,7 @@ vr_handle_arp_request(struct vr_arp *sarp, struct vr_packet *pkt, struct vr_packet *pkt_c; struct vr_interface *vif = pkt->vp_if; + VR_MAC_COPY(dmac, eth_dmac); arp_result = vif->vif_mac_request(vif, pkt, fmd, dmac); switch (arp_result) { @@ -386,6 +421,7 @@ vif_plug_mac_request(struct vr_interface *vif, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { int nheader, handled = 1; + unsigned char eth_dmac[VR_ETHER_ALEN]; if (pkt->vp_flags & VP_FLAG_MULTICAST) goto unhandled; @@ -394,13 +430,15 @@ vif_plug_mac_request(struct vr_interface *vif, struct vr_packet *pkt, if (nheader < 0 || (pkt->vp_data + nheader > pkt->vp_end)) goto unhandled; + VR_MAC_COPY(eth_dmac, pkt_data(pkt)); + if (pkt->vp_type == VP_TYPE_ARP) { if (pkt->vp_len < (nheader + sizeof(struct vr_arp))) goto unhandled; pkt_pull(pkt, nheader); - handled = vr_arp_input(pkt, fmd); + handled = vr_arp_input(pkt, fmd, eth_dmac); if (!handled) { pkt_push(pkt, nheader); } @@ -413,7 +451,7 @@ vif_plug_mac_request(struct vr_interface *vif, struct vr_packet *pkt, pkt_pull(pkt, nheader); - handled = vr_neighbor_input(pkt, fmd); + handled = vr_neighbor_input(pkt, fmd, eth_dmac); if (!handled) { pkt_push(pkt, nheader); } @@ -468,7 +506,8 @@ vr_pkt_type(struct vr_packet *pkt, unsigned short offset, } int -vr_arp_input(struct vr_packet *pkt, struct vr_forwarding_md *fmd) +vr_arp_input(struct vr_packet *pkt, struct vr_forwarding_md *fmd, + unsigned char *eth_dmac) { int handled = 1; struct vr_arp sarp; @@ -486,7 +525,7 @@ vr_arp_input(struct vr_packet *pkt, struct vr_forwarding_md *fmd) switch (ntohs(sarp.arp_op)) { case VR_ARP_OP_REQUEST: - return vr_handle_arp_request(&sarp, pkt, fmd); + return vr_handle_arp_request(&sarp, pkt, fmd, eth_dmac); case VR_ARP_OP_REPLY: return vr_handle_arp_reply(&sarp, pkt, fmd); @@ -606,6 +645,7 @@ vr_fabric_input(struct vr_interface *vif, struct vr_packet *pkt, int handled = 0; unsigned short pull_len; struct vr_forwarding_md fmd; + unsigned char *data, eth_dmac[VR_ETHER_ALEN]; vr_init_forwarding_md(&fmd); fmd.fmd_vlan = vlan_id; @@ -628,13 +668,16 @@ vr_fabric_input(struct vr_interface *vif, struct vr_packet *pkt, return vif_xconnect(vif, pkt, &fmd); } + data = pkt_data(pkt); pull_len = pkt_get_network_header_off(pkt) - pkt_head_space(pkt); pkt_pull(pkt, pull_len); - if (pkt->vp_type == VP_TYPE_IP || pkt->vp_type == VP_TYPE_IP6) + if (pkt->vp_type == VP_TYPE_IP || pkt->vp_type == VP_TYPE_IP6) { handled = vr_l3_input(pkt, &fmd); - else if (pkt->vp_type == VP_TYPE_ARP) - handled = vr_arp_input(pkt, &fmd); + } else if (pkt->vp_type == VP_TYPE_ARP) { + VR_MAC_COPY(eth_dmac, data); + handled = vr_arp_input(pkt, &fmd, eth_dmac); + } if (!handled) { pkt_push(pkt, pull_len); diff --git a/dp-core/vr_nexthop.c b/dp-core/vr_nexthop.c index d8e91cc27..d38766cc3 100644 --- a/dp-core/vr_nexthop.c +++ b/dp-core/vr_nexthop.c @@ -281,13 +281,15 @@ static int nh_l2_rcv(struct vr_packet *pkt, struct vr_nexthop *nh, struct vr_forwarding_md *fmd) { - struct vr_vrf_stats *stats; + unsigned char eth_dmac[VR_ETHER_ALEN], *data; int pull_len, handled = 0; + struct vr_vrf_stats *stats; stats = vr_inet_vrf_stats(fmd->fmd_dvrf, pkt->vp_cpu); if (stats) stats->vrf_l2_receives++; + data = pkt_data(pkt); fmd->fmd_to_me = 1; pull_len = pkt_get_network_header_off(pkt) - pkt_head_space(pkt); if (pkt_pull(pkt, pull_len) < 0) { @@ -301,13 +303,15 @@ nh_l2_rcv(struct vr_packet *pkt, struct vr_nexthop *nh, * generated by Agent for some features */ if (pkt->vp_type == VP_TYPE_IP6) { - handled = vr_neighbor_input(pkt, fmd); + VR_MAC_COPY(eth_dmac, data); + handled = vr_neighbor_input(pkt, fmd, eth_dmac); if (!handled) handled = vr_l3_input(pkt, fmd); } else if (pkt->vp_type == VP_TYPE_IP) { handled = vr_l3_input(pkt, fmd); } else if (pkt->vp_type == VP_TYPE_ARP) { - handled = vr_arp_input(pkt, fmd); + VR_MAC_COPY(eth_dmac, data); + handled = vr_arp_input(pkt, fmd, eth_dmac); } if (!handled) @@ -789,13 +793,13 @@ nh_composite_mcast_validate_src(struct vr_packet *pkt, struct vr_nexthop *nh, } static int -nh_handle_mcast_control_pkt(struct vr_packet *pkt, struct vr_forwarding_md *fmd, - unsigned int pkt_src, bool *flood_to_vms) +nh_handle_mcast_control_pkt(struct vr_packet *pkt, struct vr_eth *eth, + struct vr_forwarding_md *fmd, unsigned int pkt_src, bool *flood_to_vms) { int handled = 1; + unsigned char eth_dmac[VR_ETHER_ALEN]; unsigned short trap, rt_flags, drop_reason, pull_len = 0; l4_pkt_type_t l4_type = L4_TYPE_UNKNOWN; - struct vr_eth *eth; struct vr_arp *sarp; struct vr_nexthop *src_nh; struct vr_ip6 *ip6; @@ -806,8 +810,6 @@ nh_handle_mcast_control_pkt(struct vr_packet *pkt, struct vr_forwarding_md *fmd, if (fmd->fmd_vlan != VLAN_ID_INVALID) return !handled; - eth = (struct vr_eth *)pkt_data(pkt); - pull_len = pkt_get_network_header_off(pkt) - pkt_head_space(pkt); if (pkt_pull(pkt, pull_len) < 0) { drop_reason = VP_DROP_PULL; @@ -815,7 +817,8 @@ nh_handle_mcast_control_pkt(struct vr_packet *pkt, struct vr_forwarding_md *fmd, } if (pkt->vp_type == VP_TYPE_ARP) { - handled = vr_arp_input(pkt, fmd); + VR_MAC_COPY(eth_dmac, eth->eth_dmac); + handled = vr_arp_input(pkt, fmd, eth_dmac); if (handled) return handled; @@ -880,7 +883,8 @@ nh_handle_mcast_control_pkt(struct vr_packet *pkt, struct vr_forwarding_md *fmd, } if (l4_type == L4_TYPE_NEIGHBOUR_SOLICITATION) { - handled = vr_neighbor_input(pkt, fmd); + VR_MAC_COPY(eth_dmac, eth->eth_dmac); + handled = vr_neighbor_input(pkt, fmd, eth_dmac); if (handled) return handled; @@ -966,7 +970,7 @@ nh_composite_mcast_l2(struct vr_packet *pkt, struct vr_nexthop *nh, goto drop; } - handled = nh_handle_mcast_control_pkt(pkt, fmd, pkt_src, &flood_to_vms); + handled = nh_handle_mcast_control_pkt(pkt, eth, fmd, pkt_src, &flood_to_vms); if (handled) return 0; diff --git a/dp-core/vr_proto_ip6.c b/dp-core/vr_proto_ip6.c index 49df82409..090aa2360 100644 --- a/dp-core/vr_proto_ip6.c +++ b/dp-core/vr_proto_ip6.c @@ -416,7 +416,8 @@ vm_neighbor_request(struct vr_interface *vif, struct vr_packet *pkt, } int -vr_neighbor_input(struct vr_packet *pkt, struct vr_forwarding_md *fmd) +vr_neighbor_input(struct vr_packet *pkt, struct vr_forwarding_md *fmd, + unsigned char *eth_dmac) { int handled = 1; uint32_t pull_len, len; @@ -465,6 +466,8 @@ vr_neighbor_input(struct vr_packet *pkt, struct vr_forwarding_md *fmd) if (nopt->vno_type != SOURCE_LINK_LAYER_ADDRESS_OPTION) goto drop; + VR_MAC_COPY(dmac, eth_dmac); + ndisc_result = vif->vif_mac_request(vif, pkt, fmd, dmac); switch (ndisc_result) { case MR_PROXY: diff --git a/include/vr_datapath.h b/include/vr_datapath.h index ad3dd4ecc..4e2272c0e 100644 --- a/include/vr_datapath.h +++ b/include/vr_datapath.h @@ -27,10 +27,11 @@ unsigned int vr_fabric_input(struct vr_interface *, struct vr_packet *, int vr_l3_input(struct vr_packet *, struct vr_forwarding_md *); int vr_l2_input(struct vr_packet *, struct vr_forwarding_md *); -int vr_arp_input(struct vr_packet *, struct vr_forwarding_md *); +int vr_arp_input(struct vr_packet *, struct vr_forwarding_md *, unsigned char *); int vr_ip_input(struct vrouter *, struct vr_packet *, struct vr_forwarding_md *); -int vr_neighbor_input(struct vr_packet *, struct vr_forwarding_md *); +int vr_neighbor_input(struct vr_packet *, struct vr_forwarding_md *, + unsigned char *); int vr_ip6_input(struct vrouter *, struct vr_packet *, struct vr_forwarding_md *); extern void vr_ip_update_csum(struct vr_packet *, unsigned int, unsigned int);