From a2fde20b47a73ec1cea6219f19f41c5894cb007b Mon Sep 17 00:00:00 2001 From: "Anand H. Krishnan" Date: Fri, 3 Apr 2015 16:16:03 +0530 Subject: [PATCH] Return error to agent if an entry already existed in the flow table It is logically possible that agent and datapath are trying to create same flow simultaneously. If it so happens that agent gets the entry that datapath created and tries to update that entry assuming that the entry was created by it, then the hold count will never be compensated by a corresponding acted count, and hence vrouter's perception of the number of active hold entries can go wrong. To fix this, return error to agent if the flow it tried to create already existed. Other fixes: . If agent is changing the flow state to 'hold' from any other state, update the hold count entry. . Export the hold count statistics to 'flow' utility Change-Id: I24087baa5bf853b863f34e1b55882927d9114349 Partial-BUG: #1439069 --- dp-core/vr_flow.c | 113 ++++++++++++++++++++++++++++++++++++++++--- dp-core/vr_sandesh.c | 3 +- include/vr_flow.h | 2 + include/vrouter.h | 1 + linux/vrouter_mod.c | 7 +++ sandesh/vr.sandesh | 6 +++ utils/flow.c | 32 +++++++++++- 7 files changed, 156 insertions(+), 8 deletions(-) diff --git a/dp-core/vr_flow.c b/dp-core/vr_flow.c index f5ebc4cb4..58c4acdd3 100644 --- a/dp-core/vr_flow.c +++ b/dp-core/vr_flow.c @@ -36,6 +36,8 @@ unsigned int vr_flow_entries = VR_DEF_FLOW_ENTRIES; unsigned int vr_oflow_entries = VR_DEF_OFLOW_ENTRIES; +unsigned int vr_flow_hold_limit = 1; + #if defined(__linux__) && defined(__KERNEL__) extern unsigned short vr_flow_major; #endif @@ -831,9 +833,16 @@ vr_flow_entry_set_hold(struct vrouter *router, struct vr_flow_entry *flow_e) struct vr_flow_table_info *infop = router->vr_flow_table_info; cpu = vr_get_cpu(); + if (cpu >= vr_num_cpus) { + vr_printf("vrouter: Set HOLD failed (cpu %u num_cpus %u)\n", + cpu, vr_num_cpus); + return; + } + flow_e->fe_action = VR_FLOW_ACTION_HOLD; if (infop->vfti_hold_count[cpu] + 1 < infop->vfti_hold_count[cpu]) { + (void)__sync_add_and_fetch(&infop->vfti_oflows, 1); act_count = infop->vfti_action_count; if (act_count > infop->vfti_hold_count[cpu]) { (void)__sync_sub_and_fetch(&infop->vfti_action_count, @@ -867,7 +876,9 @@ vr_flow_lookup(struct vrouter *router, unsigned short vrf, (pkt->vp_nh->nh_flags & NH_FLAG_RELAXED_POLICY)) return vr_flow_forward(vrf, pkt, proto, fmd); - if (vr_flow_table_hold_count(router) > VR_MAX_FLOW_TABLE_HOLD_COUNT) { + if ((vr_flow_hold_limit) && + (vr_flow_table_hold_count(router) > + VR_MAX_FLOW_TABLE_HOLD_COUNT)) { vr_pfree(pkt, VP_DROP_FLOW_UNUSABLE); return 0; } @@ -1388,8 +1399,12 @@ vr_add_flow(unsigned int rid, struct vr_flow_key *key, struct vrouter *router = vrouter_get(rid); flow_e = vr_find_flow(router, key, fe_index); - if (!flow_e) + if (flow_e) { + /* a race between agent and dp. allow agent to handle this error */ + return NULL; + } else { flow_e = vr_find_free_entry(router, key, need_hold_queue, fe_index); + } return flow_e; } @@ -1549,6 +1564,7 @@ vr_flow_set(struct vrouter *router, vr_flow_req *req) { int ret; unsigned int fe_index; + struct vr_flow_entry *fe = NULL; struct vr_flow_table_info *infop = router->vr_flow_table_info; @@ -1584,6 +1600,8 @@ vr_flow_set(struct vrouter *router, vr_flow_req *req) fe = vr_add_flow_req(req, &fe_index); if (!fe) return -ENOSPC; + + infop->vfti_added++; } else { if ((req->fr_action == VR_FLOW_ACTION_HOLD) && (fe->fe_action != req->fr_action)) { @@ -1620,15 +1638,66 @@ vr_flow_set(struct vrouter *router, vr_flow_req *req) fe->fe_ecmp_nh_index = req->fr_ecmp_nh_index; fe->fe_src_nh_index = req->fr_src_nh_index; - fe->fe_action = req->fr_action; + + if ((req->fr_action == VR_FLOW_ACTION_HOLD) && + (fe->fe_action != VR_FLOW_ACTION_HOLD)) { + vr_flow_entry_set_hold(router, fe); + } else { + fe->fe_action = req->fr_action; + } + if (fe->fe_action == VR_FLOW_ACTION_DROP) fe->fe_drop_reason = (uint8_t)req->fr_drop_reason; + fe->fe_flags = req->fr_flags; vr_flow_udp_src_port(router, fe); return vr_flow_schedule_transition(router, req, fe); } +static void +vr_flow_req_destroy(vr_flow_req *req) +{ + if (!req) + return; + + if (req->fr_hold_stat && req->fr_hold_stat_size) { + vr_free(req->fr_hold_stat); + req->fr_hold_stat = NULL; + req->fr_hold_stat_size = 0; + } + + vr_free(req); + + return; +} + +vr_flow_req * +vr_flow_req_get(vr_flow_req *ref_req) +{ + unsigned int hold_stat_size = vr_num_cpus * sizeof(uint32_t); + vr_flow_req *req = vr_zalloc(sizeof(*req)); + + if (!req) + return NULL; + + if (ref_req) { + memcpy(req, ref_req, sizeof(*ref_req)); + /* not intended */ + req->fr_pcap_meta_data = NULL; + req->fr_pcap_meta_data_size = 0; + } + + req->fr_hold_stat = vr_zalloc(hold_stat_size); + if (!req->fr_hold_stat) { + vr_free(req); + return NULL; + } + req->fr_hold_stat_size = hold_stat_size; + + return req; +} + /* * sandesh handler for vr_flow_req */ @@ -1636,28 +1705,60 @@ void vr_flow_req_process(void *s_req) { int ret = 0; + unsigned int i; + bool need_destroy = false; + uint64_t hold_count = 0; + struct vrouter *router; vr_flow_req *req = (vr_flow_req *)s_req; + vr_flow_req *resp = NULL; router = vrouter_get(req->fr_rid); switch (req->fr_op) { case FLOW_OP_FLOW_TABLE_GET: - req->fr_ftable_size = vr_flow_table_size(router) + + resp = vr_flow_req_get(req); + if (!resp) { + ret = -ENOMEM; + goto send_response; + } + + need_destroy = true; + + resp->fr_ftable_size = vr_flow_table_size(router) + vr_oflow_table_size(router); #if defined(__linux__) && defined(__KERNEL__) - req->fr_ftable_dev = vr_flow_major; + resp->fr_ftable_dev = vr_flow_major; #endif + resp->fr_processed = router->vr_flow_table_info->vfti_action_count; + resp->fr_hold_oflows = router->vr_flow_table_info->vfti_oflows; + resp->fr_added = router->vr_flow_table_info->vfti_added; + resp->fr_cpus = vr_num_cpus; + /* we only have space for 64 stats block max when encoding */ + for (i = 0; ((i < vr_num_cpus) && (i < 64)); i++) { + resp->fr_hold_stat[i] = + router->vr_flow_table_info->vfti_hold_count[i]; + hold_count += resp->fr_hold_stat[i]; + } + + resp->fr_created = hold_count; + break; case FLOW_OP_FLOW_SET: ret = vr_flow_set(router, req); + resp = req; break; default: ret = -EINVAL; } - vr_message_response(VR_FLOW_OBJECT_ID, req, ret); +send_response: + vr_message_response(VR_FLOW_OBJECT_ID, resp, ret); + if (need_destroy) { + vr_flow_req_destroy(resp); + } + return; } diff --git a/dp-core/vr_sandesh.c b/dp-core/vr_sandesh.c index 7c79ea12d..a088abeb6 100644 --- a/dp-core/vr_sandesh.c +++ b/dp-core/vr_sandesh.c @@ -34,7 +34,8 @@ struct sandesh_object_md sandesh_md[] = { .obj_type_string = "vr_mirror_req", }, [VR_FLOW_OBJECT_ID] = { - .obj_len = 4 * sizeof(vr_flow_req), + .obj_len = ((4 * sizeof(vr_flow_req)) + + (64 * sizeof(unsigned int))), .obj_type_string = "vr_flow_req", }, [VR_VRF_ASSIGN_OBJECT_ID] = { diff --git a/include/vr_flow.h b/include/vr_flow.h index 06021eefb..d901525af 100644 --- a/include/vr_flow.h +++ b/include/vr_flow.h @@ -106,6 +106,8 @@ struct vr_flow_key { */ struct vr_flow_table_info { uint64_t vfti_action_count; + uint64_t vfti_added; + uint32_t vfti_oflows; uint32_t vfti_hold_count[0]; }; diff --git a/include/vrouter.h b/include/vrouter.h index ac8d3e441..e627cbdf2 100644 --- a/include/vrouter.h +++ b/include/vrouter.h @@ -35,6 +35,7 @@ extern int vr_perfq1, vr_perfq2, vr_perfq3; extern int vr_from_vm_mss_adj; extern int vr_to_vm_mss_adj; extern int vr_udp_coff; +extern unsigned int vr_flow_hold_limit; extern int vr_use_linux_br; extern int hashrnd_inited; extern uint32_t vr_hashrnd; diff --git a/linux/vrouter_mod.c b/linux/vrouter_mod.c index cdab4aec7..72b4ab0d6 100644 --- a/linux/vrouter_mod.c +++ b/linux/vrouter_mod.c @@ -2252,6 +2252,13 @@ static ctl_table vrouter_table[] = .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "flow_hold_limit", + .data = &vr_flow_hold_limit, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, {} }; diff --git a/sandesh/vr.sandesh b/sandesh/vr.sandesh index 90fe86cb8..e827a92ba 100644 --- a/sandesh/vr.sandesh +++ b/sandesh/vr.sandesh @@ -149,6 +149,12 @@ buffer sandesh vr_flow_req { 23: i32 fr_src_nh_index; 24: i16 fr_flow_nh_id; 25: i16 fr_drop_reason; + 26: i64 fr_processed; + 27: i64 fr_created; + 28: i64 fr_added; + 29: i32 fr_cpus; + 30: i32 fr_hold_oflows; + 31: list fr_hold_stat; } buffer sandesh vr_vrf_assign_req { diff --git a/utils/flow.c b/utils/flow.c index ec6548ccd..41d114d74 100644 --- a/utils/flow.c +++ b/utils/flow.c @@ -55,8 +55,14 @@ struct flow_table { struct vr_flow_entry *ft_entries; u_int64_t ft_entries_p; u_int64_t ft_span; + u_int64_t ft_processed; + u_int64_t ft_created; + u_int64_t ft_added; unsigned int ft_num_entries; unsigned int ft_flags; + unsigned int ft_cpus; + unsigned int ft_hold_oflows; + u_int32_t ft_hold_stat[64]; } main_table; int mem_fd; @@ -128,7 +134,18 @@ dump_table(struct flow_table *ft) const char *drop_reason = NULL; struct in_addr in_src, in_dest; - printf("Flow table\n\n"); + printf("Flow table(size %lu, entries %u)\n\n", ft->ft_span, + ft->ft_num_entries); + printf("Entries: Created %lu Added %lu Processed %lu\n", + ft->ft_created, ft->ft_added, ft->ft_processed); + printf("(Created Flows/CPU: "); + for (i = 0; i < ft->ft_cpus; i++) { + printf("%u", ft->ft_hold_stat[i]); + if (i != (ft->ft_cpus - 1)) + printf(" "); + } + printf(")(oflows %u)\n", ft->ft_hold_oflows); + printf(" Index Source:Port Destination:Port \tProto(V)\n"); printf("-----------------------------------------------------------------"); printf("--------\n"); @@ -427,6 +444,7 @@ int flow_table_map(vr_flow_req *req) { int ret; + unsigned int i; struct flow_table *ft = &main_table; if (req->fr_ftable_dev < 0) @@ -454,6 +472,18 @@ flow_table_map(vr_flow_req *req) ft->ft_span = req->fr_ftable_size; ft->ft_num_entries = ft->ft_span / sizeof(struct vr_flow_entry); + ft->ft_processed = req->fr_processed; + ft->ft_created = req->fr_created; + ft->ft_hold_oflows = req->fr_hold_oflows; + ft->ft_added = req->fr_added; + ft->ft_cpus = req->fr_cpus; + + if (req->fr_hold_stat && req->fr_hold_stat_size) { + for (i = 0; i < ft->ft_cpus; i++) { + ft->ft_hold_stat[i] = req->fr_hold_stat[i]; + } + } + return ft->ft_num_entries; }