diff --git a/dp-core/vr_nexthop.c b/dp-core/vr_nexthop.c index 39d8c68f3..6a1ea7f5c 100644 --- a/dp-core/vr_nexthop.c +++ b/dp-core/vr_nexthop.c @@ -80,6 +80,13 @@ vrouter_free_nexthop(struct vr_nexthop *nh) vr_free(nh->nh_component_nh, VR_NEXTHOP_COMPONENT_OBJECT); nh->nh_component_nh = NULL; } + + if (nh->nh_component_ecmp) { + nh->nh_component_ecmp_cnt = 0; + vr_free(nh->nh_component_ecmp, VR_NEXTHOP_COMPONENT_OBJECT); + nh->nh_component_ecmp = NULL; + } + } else if ((nh->nh_type == NH_TUNNEL) && (nh->nh_flags & NH_FLAG_TUNNEL_UDP) && (nh->nh_family == AF_INET6)) { @@ -584,6 +591,47 @@ nh_composite_ecmp_validate_src(struct vr_packet *pkt, struct vr_nexthop *nh, return NH_SOURCE_VALID; } +static struct vr_nexthop * +nh_composite_ecmp_select_nh(struct vr_packet *pkt, struct vr_nexthop *nh, + struct vr_forwarding_md *fmd) +{ + int ret; + unsigned int hash, hash_ecmp, count; + + struct vr_flow flow; + struct vr_ip6 *ip6; + struct vr_nexthop *cnh = NULL; + struct vr_component_nh *cnhp = nh->nh_component_nh; + struct vr_component_nh *cnhp_ecmp = nh->nh_component_ecmp; + + if (!(count = nh->nh_component_cnt)) + return NULL; + + if (pkt->vp_type == VP_TYPE_IP) { + ret = vr_inet_get_flow_key(nh->nh_router, pkt, fmd, &flow); + if (ret < 0) + return NULL; + } else if (pkt->vp_type == VP_TYPE_IP6) { + ip6 = (struct vr_ip6 *)pkt_network_header(pkt); + ret = vr_inet6_form_flow(nh->nh_router, fmd->fmd_dvrf, pkt, + fmd->fmd_vlan, ip6, &flow); + if (ret < 0) + return NULL; + } else { + return NULL; + } + + hash = hash_ecmp = vr_hash(&flow, flow.flow_key_len, 0); + hash %= count; + cnh = cnhp[hash].cnh; + if (!cnh && nh->nh_component_ecmp_cnt) { + hash_ecmp %= nh->nh_component_ecmp_cnt; + cnh = cnhp_ecmp[hash_ecmp].cnh; + } + + return cnh; +} + static int nh_composite_ecmp(struct vr_packet *pkt, struct vr_nexthop *nh, struct vr_forwarding_md *fmd) @@ -600,12 +648,20 @@ nh_composite_ecmp(struct vr_packet *pkt, struct vr_nexthop *nh, if (!fmd || fmd->fmd_ecmp_nh_index >= (short)nh->nh_component_cnt) goto drop; - if (fmd->fmd_ecmp_nh_index >= 0) + if (fmd->fmd_ecmp_nh_index >= 0) { member_nh = nh->nh_component_nh[fmd->fmd_ecmp_nh_index].cnh; + } else if (fmd->fmd_flow_index < 0) { + member_nh = nh_composite_ecmp_select_nh(pkt, nh, fmd); + } if (!member_nh) { - vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_ECMP_RESOLVE, &fmd->fmd_flow_index); - return 0; + if (fmd->fmd_flow_index < 0) { + vr_pfree(pkt, VP_DROP_INVALID_NH); + return 0; + } else { + vr_trap(pkt, fmd->fmd_dvrf, AGENT_TRAP_ECMP_RESOLVE, &fmd->fmd_flow_index); + return 0; + } } vr_forwarding_md_set_label(fmd, @@ -2150,7 +2206,7 @@ nh_composite_mcast_validate(struct vr_nexthop *nh, vr_nexthop_req *req) static int nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) { - unsigned int i; + unsigned int i, j = 0, active = 0; struct vr_nexthop *tmp_nh; nh->nh_validate_src = NULL; @@ -2163,6 +2219,11 @@ nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) vr_free(nh->nh_component_nh, VR_NEXTHOP_COMPONENT_OBJECT); nh->nh_component_nh = NULL; nh->nh_component_cnt = 0; + + if (nh->nh_component_ecmp) { + vr_free(nh->nh_component_ecmp, VR_NEXTHOP_COMPONENT_OBJECT); + nh->nh_component_ecmp = NULL; + } } if (req->nhr_nh_list_size != req->nhr_label_list_size) @@ -2181,6 +2242,8 @@ nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) nh->nh_component_nh[i].cnh = vrouter_get_nexthop(req->nhr_rid, req->nhr_nh_list[i]); nh->nh_component_nh[i].cnh_label = req->nhr_label_list[i]; + if (nh->nh_component_nh[i].cnh) + active++; } nh->nh_component_cnt = req->nhr_nh_list_size; @@ -2194,6 +2257,21 @@ nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) } else if (req->nhr_flags & NH_FLAG_COMPOSITE_ECMP) { nh->nh_reach_nh = nh_composite_ecmp; nh->nh_validate_src = nh_composite_ecmp_validate_src; + if (active) { + nh->nh_component_ecmp = vr_zalloc(active * + sizeof(struct vr_component_nh), VR_NEXTHOP_COMPONENT_OBJECT); + if (!nh->nh_component_ecmp) { + goto error; + } + + for (i = 0; i < req->nhr_nh_list_size; i++) { + if (nh->nh_component_nh[i].cnh) { + memcpy(&nh->nh_component_ecmp[j++], &nh->nh_component_nh[i], + sizeof(struct vr_component_nh)); + } + } + nh->nh_component_ecmp_cnt = j; + } } else if (req->nhr_flags & NH_FLAG_COMPOSITE_FABRIC) { nh->nh_reach_nh = nh_composite_fabric; } else if (req->nhr_flags & NH_FLAG_COMPOSITE_EVPN) { @@ -2215,6 +2293,11 @@ nh_composite_add(struct vr_nexthop *nh, vr_nexthop_req *req) } vr_free(nh->nh_component_nh, VR_NEXTHOP_COMPONENT_OBJECT); + if (nh->nh_component_ecmp) { + vr_free(nh->nh_component_ecmp, VR_NEXTHOP_COMPONENT_OBJECT); + nh->nh_component_ecmp = NULL; + } + nh->nh_component_nh = NULL; nh->nh_component_cnt = 0; } diff --git a/dp-core/vr_proto_ip.c b/dp-core/vr_proto_ip.c index 717cb414c..ab9a97c46 100644 --- a/dp-core/vr_proto_ip.c +++ b/dp-core/vr_proto_ip.c @@ -949,6 +949,26 @@ vr_inet_should_trap(struct vr_packet *pkt, struct vr_flow *flow_p) return false; } +int +vr_inet_get_flow_key(struct vrouter *router, struct vr_packet *pkt, + struct vr_forwarding_md *fmd, struct vr_flow *flow) +{ + int ret; + struct vr_ip *ip; + + ret = vr_inet_form_flow(router, fmd->fmd_dvrf, pkt, fmd->fmd_vlan, flow); + if (ret < 0) + return ret; + + ip = (struct vr_ip *)pkt_network_header(pkt); + if (vr_ip_fragment_head(ip)) { + vr_fragment_add(router, fmd->fmd_dvrf, ip, flow->flow4_sport, + flow->flow4_dport); + } + + return 0; +} + flow_result_t vr_inet_flow_lookup(struct vrouter *router, struct vr_packet *pkt, struct vr_forwarding_md *fmd) diff --git a/dp-core/vr_proto_ip6.c b/dp-core/vr_proto_ip6.c index 6233c92d0..e104f1679 100644 --- a/dp-core/vr_proto_ip6.c +++ b/dp-core/vr_proto_ip6.c @@ -168,7 +168,7 @@ vr_inet6_flow_is_fat_flow(struct vrouter *router, struct vr_packet *pkt, return false; } -static int +int vr_inet6_form_flow(struct vrouter *router, unsigned short vrf, struct vr_packet *pkt, uint16_t vlan, struct vr_ip6 *ip6, struct vr_flow *flow_p) diff --git a/include/vr_flow.h b/include/vr_flow.h index a4185e385..911af7f9e 100644 --- a/include/vr_flow.h +++ b/include/vr_flow.h @@ -382,6 +382,7 @@ typedef enum { struct vr_packet; struct vrouter; +struct vr_ip6; extern int vr_flow_init(struct vrouter *); extern void vr_flow_exit(struct vrouter *, bool); @@ -401,6 +402,8 @@ flow_result_t vr_inet_flow_lookup(struct vrouter *, struct vr_packet *, struct vr_forwarding_md *); flow_result_t vr_inet6_flow_lookup(struct vrouter *, struct vr_packet *, struct vr_forwarding_md *); +int vr_inet6_form_flow(struct vrouter *, unsigned short, struct vr_packet *, + uint16_t, struct vr_ip6 *, struct vr_flow *); unsigned short vr_inet_flow_nexthop(struct vr_packet *pkt, unsigned short vlan); @@ -415,6 +418,8 @@ extern bool vr_inet_flow_is_fat_flow(struct vrouter *, struct vr_packet *, extern bool vr_inet6_flow_is_fat_flow(struct vrouter *, struct vr_packet *, struct vr_flow_entry *); extern bool vr_inet_flow_allow_new_flow(struct vrouter *, struct vr_packet *); +extern int vr_inet_get_flow_key(struct vrouter *, struct vr_packet *, + struct vr_forwarding_md *, struct vr_flow *); extern unsigned int vr_reinject_packet(struct vr_packet *, struct vr_forwarding_md *); diff --git a/include/vr_nexthop.h b/include/vr_nexthop.h index bf19d1dae..6f379e247 100644 --- a/include/vr_nexthop.h +++ b/include/vr_nexthop.h @@ -80,6 +80,7 @@ struct vr_nexthop { * nexthops */ uint8_t nh_family; + uint16_t nh_data_size; uint32_t nh_flags; int nh_vrf; unsigned int nh_id; @@ -115,12 +116,13 @@ struct vr_nexthop { struct { unsigned short cnt; + unsigned short ecmp_cnt; struct vr_component_nh *component; + struct vr_component_nh *ecmp_active; } nh_composite; } nh_u; - uint16_t nh_data_size; struct vrouter *nh_router; int (*nh_validate_src)(struct vr_packet *, struct vr_nexthop *, @@ -156,6 +158,9 @@ struct vr_nexthop { #define nh_component_cnt nh_u.nh_composite.cnt #define nh_component_nh nh_u.nh_composite.component +#define nh_component_ecmp_cnt nh_u.nh_composite.ecmp_cnt +#define nh_component_ecmp nh_u.nh_composite.ecmp_active + static inline bool vr_nexthop_is_vcp(struct vr_nexthop *nh)