From 5ee57ac18e896711de0f256348eaacfeb402c374 Mon Sep 17 00:00:00 2001 From: "Anand H. Krishnan" Date: Wed, 6 Apr 2016 11:27:31 +0530 Subject: [PATCH] ECMP without flow Currently ECMP does not work if the packets do not transit the flow table (i.e.: if there is no policy that guides the packet). The requirement for flow lookup comes from the fact that flows need to follow the same path (also called flow stickiness) regardless of existing destinations leaving and new destinations entering, which affects the list of ECMP destinations, and thus the distribution. To workaround that problem, we maintain an index in the flow entry that points to the correct end point in the distribution list. For non-policy looked up packets, requirement of following the same path in case of transitions is dropped. Change-Id: Ib7688c9dcac522025196334f469844c79ebdf095 Partial-BUG: #1566650 --- dp-core/vr_nexthop.c | 91 ++++++++++++++++++++++++++++++++++++++++-- dp-core/vr_proto_ip.c | 20 ++++++++++ dp-core/vr_proto_ip6.c | 2 +- include/vr_flow.h | 5 +++ include/vr_nexthop.h | 7 +++- 5 files changed, 119 insertions(+), 6 deletions(-) diff --git a/dp-core/vr_nexthop.c b/dp-core/vr_nexthop.c index 39d8c68f3..6a1ea7f5c 100644 --- a/dp-core/vr_nexthop.c +++ b/dp-core/vr_nexthop.c @@ -80,6 +80,13 @@ vrouter_free_nexthop(struct vr_nexthop *nh) vr_free(nh->nh_component_nh, VR_NEXTHOP_COMPONENT_OBJECT); nh->nh_component_nh = NULL; } + + if (nh->nh_component_ecmp) { + nh->nh_component_ecmp_cnt = 0; + vr_free(nh->nh_component_ecmp, VR_NEXTHOP_COMPONENT_OBJECT); + nh->nh_component_ecmp = NULL; + } + } else if ((nh->nh_type == NH_TUNNEL) && (nh->nh_flags & NH_FLAG_TUNNEL_UDP) && (nh->nh_family == AF_INET6)) { @@ -584,6 +591,47 @@ nh_composite_ecmp_validate_src(struct vr_packet *pkt, struct vr_nexthop *nh, return NH_SOURCE_VALID; } +static struct vr_nexthop * +nh_composite_ecmp_select_nh(struct vr_packet *pkt, struct vr_nexthop *nh, + struct vr_forwarding_md *fmd) +{ + int ret; + unsigned int hash, hash_ecmp, count; + + struct vr_flow flow; + struct vr_ip6 *ip6; + struct vr_nexthop *cnh = NULL; + struct vr_component_nh *cnhp = nh->nh_component_nh; + struct vr_component_nh *cnhp_ecmp = nh->nh_component_ecmp; + + if (!(count = nh->nh_component_cnt)) + return NULL; + + if (pkt->vp_type == VP_TYPE_IP) { + ret = vr_inet_get_flow_key(nh->nh_router, pkt, fmd, &flow); + if (ret < 0) + return NULL; + } else if (pkt->vp_type == VP_TYPE_IP6) { + ip6 = (struct vr_ip6 *)pkt_network_header(pkt); + ret = vr_inet6_form_flow(nh->nh_router, fmd->fmd_dvrf, pkt, + fmd->fmd_vlan, ip6, &flow); + if (ret < 0) + return NULL; + } else { + return NULL; + } + + hash = hash_ecmp = vr_hash(&flow, flow.flow_key_len, 0); + hash %= count; + cnh = cnhp[hash].cnh; + if (!cnh && nh->nh_component_ecmp_cnt) { + hash_ecmp %= nh->nh_component_ecmp_cnt; + cnh = cnhp_ecmp[hash_ecmp].cnh; + } + + return cnh; +} + static int nh_composite_ecmp(struct vr_packet *pkt, struct vr_nexthop *nh, struct vr_forwarding_md *fmd) @@ -600,12 +648,20 @@ nh_composite_ecmp(struct vr_packet *pkt, struct vr_nexthop *nh, if (!fmd || fmd->fmd_ecmp_nh_index >= (short)nh->nh_component_cnt) goto drop; - if (fmd->fmd_ecmp_nh_index >= 0) + if (fmd->fmd_ecmp_nh_index >= 0) { member_nh = nh->nh_component_nh[fmd->fmd_ecmp_nh_index].cnh; + } else if (fmd->fmd_flow_index < 0) { + member_nh = nh_composite_ecmp_select_nh(pkt, nh, fmd); + } if (!member_nh) { - vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_ECMP_RESOLVE, &fmd->fmd_flow_index); - return 0; + if (fmd->fmd_flow_index < 0) { + vr_pfree(pkt, VP_DROP_INVALID_NH); + return 0; + } else { + vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_ECMP_RESOLVE, &fmd->fmd_flow_index); + return 0; + } } vr_forwarding_md_set_label(fmd, @@ -2150,7 +2206,7 @@ nh_composite_mcast_validate(struct vr_nexthop *nh, vr_nexthop_req *req) static int nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) { - unsigned int i; + unsigned int i, j = 0, active = 0; struct vr_nexthop *tmp_nh; nh->nh_validate_src = NULL; @@ -2163,6 +2219,11 @@ nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) vr_free(nh->nh_component_nh, VR_NEXTHOP_COMPONENT_OBJECT); nh->nh_component_nh = NULL; nh->nh_component_cnt = 0; + + if (nh->nh_component_ecmp) { + vr_free(nh->nh_component_ecmp, VR_NEXTHOP_COMPONENT_OBJECT); + nh->nh_component_ecmp = NULL; + } } if (req->nhr_nh_list_size != req->nhr_label_list_size) @@ -2181,6 +2242,8 @@ nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) nh->nh_component_nh[i].cnh = vrouter_get_nexthop(req->nhr_rid, req->nhr_nh_list[i]); nh->nh_component_nh[i].cnh_label = req->nhr_label_list[i]; + if (nh->nh_component_nh[i].cnh) + active++; } nh->nh_component_cnt = req->nhr_nh_list_size; @@ -2194,6 +2257,21 @@ nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) } else if (req->nhr_flags & NH_FLAG_COMPOSITE_ECMP) { nh->nh_reach_nh = nh_composite_ecmp; nh->nh_validate_src = nh_composite_ecmp_validate_src; + if (active) { + nh->nh_component_ecmp = vr_zalloc(active * + sizeof(struct vr_component_nh), VR_NEXTHOP_COMPONENT_OBJECT); + if (!nh->nh_component_ecmp) { + goto error; + } + + for (i = 0; i < req->nhr_nh_list_size; i++) { + if (nh->nh_component_nh[i].cnh) { + memcpy(&nh->nh_component_ecmp[j++], &nh->nh_component_nh[i], + sizeof(struct vr_component_nh)); + } + } + nh->nh_component_ecmp_cnt = j; + } } else if (req->nhr_flags & NH_FLAG_COMPOSITE_FABRIC) { nh->nh_reach_nh = nh_composite_fabric; } else if (req->nhr_flags & NH_FLAG_COMPOSITE_EVPN) { @@ -2215,6 +2293,11 @@ nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) } vr_free(nh->nh_component_nh, VR_NEXTHOP_COMPONENT_OBJECT); + if (nh->nh_component_ecmp) { + vr_free(nh->nh_component_ecmp, VR_NEXTHOP_COMPONENT_OBJECT); + nh->nh_component_ecmp = NULL; + } + nh->nh_component_nh = NULL; nh->nh_component_cnt = 0; } diff --git a/dp-core/vr_proto_ip.c b/dp-core/vr_proto_ip.c index 717cb414c..ab9a97c46 100644 --- a/dp-core/vr_proto_ip.c +++ b/dp-core/vr_proto_ip.c @@ -949,6 +949,26 @@ vr_inet_should_trap(struct vr_packet *pkt, struct vr_flow *flow_p) return false; } +int +vr_inet_get_flow_key(struct vrouter *router, struct vr_packet *pkt, + struct vr_forwarding_md *fmd, struct vr_flow *flow) +{ + int ret; + struct vr_ip *ip; + + ret = vr_inet_form_flow(router, fmd->fmd_dvrf, pkt, fmd->fmd_vlan, flow); + if (ret < 0) + return ret; + + ip = (struct vr_ip *)pkt_network_header(pkt); + if (vr_ip_fragment_head(ip)) { + vr_fragment_add(router, fmd->fmd_dvrf, ip, flow->flow4_sport, + flow->flow4_dport); + } + + return 0; +} + flow_result_t vr_inet_flow_lookup(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) diff --git a/dp-core/vr_proto_ip6.c b/dp-core/vr_proto_ip6.c index 6233c92d0..e104f1679 100644 --- a/dp-core/vr_proto_ip6.c +++ b/dp-core/vr_proto_ip6.c @@ -168,7 +168,7 @@ vr_inet6_flow_is_fat_flow(struct vrouter *router, struct vr_packet *pkt, return false; } -static int +int vr_inet6_form_flow(struct vrouter *router, unsigned short vrf, struct vr_packet *pkt, uint16_t vlan, struct vr_ip6 *ip6, struct vr_flow *flow_p) diff --git a/include/vr_flow.h b/include/vr_flow.h index a4185e385..911af7f9e 100644 --- a/include/vr_flow.h +++ b/include/vr_flow.h @@ -382,6 +382,7 @@ typedef enum { struct vr_packet; struct vrouter; +struct vr_ip6; extern int vr_flow_init(struct vrouter *); extern void vr_flow_exit(struct vrouter *, bool); @@ -401,6 +402,8 @@ flow_result_t vr_inet_flow_lookup(struct vrouter *, struct vr_packet *, struct vr_forwarding_md *); flow_result_t vr_inet6_flow_lookup(struct vrouter *, struct vr_packet *, struct vr_forwarding_md *); +int vr_inet6_form_flow(struct vrouter *, unsigned short, struct vr_packet *, + uint16_t, struct vr_ip6 *, struct vr_flow *); unsigned short vr_inet_flow_nexthop(struct vr_packet *pkt, unsigned short vlan); @@ -415,6 +418,8 @@ extern bool vr_inet_flow_is_fat_flow(struct vrouter *, struct vr_packet *, extern bool vr_inet6_flow_is_fat_flow(struct vrouter *, struct vr_packet *, struct vr_flow_entry *); extern bool vr_inet_flow_allow_new_flow(struct vrouter *, struct vr_packet *); +extern int vr_inet_get_flow_key(struct vrouter *, struct vr_packet *, + struct vr_forwarding_md *, struct vr_flow *); extern unsigned int vr_reinject_packet(struct vr_packet *, struct vr_forwarding_md *); diff --git a/include/vr_nexthop.h b/include/vr_nexthop.h index bf19d1dae..6f379e247 100644 --- a/include/vr_nexthop.h +++ b/include/vr_nexthop.h @@ -80,6 +80,7 @@ struct vr_nexthop { * nexthops */ uint8_t nh_family; + uint16_t nh_data_size; uint32_t nh_flags; int nh_vrf; unsigned int nh_id; @@ -115,12 +116,13 @@ struct vr_nexthop { struct { unsigned short cnt; + unsigned short ecmp_cnt; struct vr_component_nh *component; + struct vr_component_nh *ecmp_active; } nh_composite; } nh_u; - uint16_t nh_data_size; struct vrouter *nh_router; int (*nh_validate_src)(struct vr_packet *, struct vr_nexthop *, @@ -156,6 +158,9 @@ struct vr_nexthop { #define nh_component_cnt nh_u.nh_composite.cnt #define nh_component_nh nh_u.nh_composite.component +#define nh_component_ecmp_cnt nh_u.nh_composite.ecmp_cnt +#define nh_component_ecmp nh_u.nh_composite.ecmp_active + static inline bool vr_nexthop_is_vcp(struct vr_nexthop *nh)