Skip to content

Commit

Permalink
Flow eviction by datapath based on TCP states
Browse files Browse the repository at this point in the history
Inactive TCP flows (flows that have already seen the closure cycle -
FIN/ACK or the RESET flags) should additionally be considered as a
free flow entry so that vRouter does not have to wait for agent to
accommodate new flows. This logic will provide better service under
severe occupancy. This modification also removes the previous logic
of trapping packets to agent when datapath detects closure of a TCP
stream.

Change-Id: I1009b10f990ea2bf904ac0daec59378d1da07acd
Partial-BUG: #1362701
  • Loading branch information
anandhk-juniper committed Oct 9, 2015
1 parent 7e795e2 commit 507fda3
Show file tree
Hide file tree
Showing 8 changed files with 529 additions and 144 deletions.
542 changes: 421 additions & 121 deletions dp-core/vr_flow.c

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion dp-core/vr_interface.c
Expand Up @@ -377,6 +377,7 @@ agent_trap_may_truncate(int trap_reason)
case AGENT_TRAP_NEXTHOP:
case AGENT_TRAP_RESOLVE:
case AGENT_TRAP_FLOW_MISS:
case AGENT_TRAP_FLOW_ACTION_HOLD:
case AGENT_TRAP_ECMP_RESOLVE:
case AGENT_TRAP_HANDLE_DF:
case AGENT_TRAP_ZERO_TTL:
Expand Down Expand Up @@ -473,16 +474,20 @@ agent_send(struct vr_interface *vif, struct vr_packet *pkt,

switch (params->trap_reason) {
case AGENT_TRAP_FLOW_MISS:
case AGENT_TRAP_FLOW_ACTION_HOLD:
if (params->trap_param) {
fta = (struct vr_flow_trap_arg *)(params->trap_param);
hdr->hdr_cmd_param = htonl(fta->vfta_index);
hdr->hdr_cmd_param_1 = htonl(fta->vfta_nh_index);
hdr->hdr_cmd_param_2 = htonl(fta->vfta_stats.flow_bytes);
hdr->hdr_cmd_param_3 = htonl(fta->vfta_stats.flow_packets);
hdr->hdr_cmd_param_4 = htonl((fta->vfta_stats.flow_bytes_oflow |
(fta->vfta_stats.flow_packets_oflow << 16)));
}
break;

case AGENT_TRAP_ECMP_RESOLVE:
case AGENT_TRAP_SOURCE_MISMATCH:
case AGENT_TRAP_SESSION_CLOSE:
if (params->trap_param)
hdr->hdr_cmd_param = htonl(*(unsigned int *)(params->trap_param));
break;
Expand Down
32 changes: 18 additions & 14 deletions dpdk/vr_dpdk_host.c
Expand Up @@ -41,7 +41,7 @@ static bool vr_host_inited = false;
extern void vr_malloc_stats(unsigned int, unsigned int);
extern void vr_free_stats(unsigned int);
/* RCU callback */
extern void vr_flow_queue_free(struct vrouter *router, void *arg);
extern void vr_flow_defer_cb(struct vrouter *router, void *arg);


static void *
Expand Down Expand Up @@ -517,25 +517,29 @@ dpdk_rcu_cb(struct rcu_head *rh)
struct vr_flow_queue *vfq;
struct vr_packet_node *pnode;


cb_data = CONTAINER_OF(rcd_rcu, struct vr_dpdk_rcu_cb_data, rh);

/* check if we need to pass the callback to packet lcore */
if (cb_data->rcd_user_cb == vr_flow_queue_free
&& cb_data->rcd_user_data) {
if ((cb_data->rcd_user_cb == vr_flow_defer_cb) &&
cb_data->rcd_user_data) {
defer = (struct vr_defer_data *)cb_data->rcd_user_data;
vfq = (struct vr_flow_queue *)defer->vdd_data;
for (i = 0; i < VR_MAX_FLOW_QUEUE_ENTRIES; i++) {
pnode = &vfq->vfq_pnodes[i];
if (pnode->pl_packet) {
RTE_LOG(DEBUG, VROUTER, "%s: lcore %u passing RCU callback to lcore %u\n",
__func__, rte_lcore_id(), VR_DPDK_PACKET_LCORE_ID);
vr_dpdk_lcore_cmd_post(VR_DPDK_PACKET_LCORE_ID,
VR_DPDK_LCORE_RCU_CMD, (uintptr_t)rh);
return;
vfq = ((struct vr_flow_defer_data *)defer->vdd_data)->vfdd_flow_queue;
if (vfq) {
for (i = 0; i < VR_MAX_FLOW_QUEUE_ENTRIES; i++) {
pnode = &vfq->vfq_pnodes[i];
if (pnode->pl_packet) {
RTE_LOG(DEBUG, VROUTER, "%s: lcore %u passing RCU callback "
"to lcore %u\n", __func__, rte_lcore_id(),
VR_DPDK_PACKET_LCORE_ID);
vr_dpdk_lcore_cmd_post(VR_DPDK_PACKET_LCORE_ID,
VR_DPDK_LCORE_RCU_CMD, (uintptr_t)rh);
return;
}
}
RTE_LOG(DEBUG, VROUTER, "%s: lcore %u passing RCU callback to lcore %u\n",
__func__, rte_lcore_id(), VR_DPDK_PACKET_LCORE_ID);
}
RTE_LOG(DEBUG, VROUTER, "%s: lcore %u just calling RCU callback\n",
__func__, rte_lcore_id());
}
/* no need to send any packets, so just call the callback */
cb_data->rcd_user_cb(cb_data->rcd_router, cb_data->rcd_user_data);
Expand Down
5 changes: 4 additions & 1 deletion include/vr_defs.h
Expand Up @@ -34,7 +34,7 @@
#define AGENT_TRAP_ZERO_TTL 12
#define AGENT_TRAP_ICMP_ERROR 13
#define AGENT_TRAP_TOR_CONTROL_PKT 14
#define AGENT_TRAP_SESSION_CLOSE 15
#define AGENT_TRAP_FLOW_ACTION_HOLD 15
#define MAX_AGENT_HDR_COMMANDS 16

enum rt_type{
Expand All @@ -60,6 +60,9 @@ struct agent_hdr {
unsigned short hdr_cmd;
unsigned int hdr_cmd_param;
unsigned int hdr_cmd_param_1;
unsigned int hdr_cmd_param_2;
unsigned int hdr_cmd_param_3;
unsigned int hdr_cmd_param_4;
} __attribute__((packed));

#define CMD_PARAM_PACKET_CTRL 0x1
Expand Down
69 changes: 63 additions & 6 deletions include/vr_flow.h
Expand Up @@ -21,12 +21,18 @@ typedef enum {
FLOW_CONSUMED,
} flow_result_t;

#define VR_FLOW_FLAG_ACTIVE 0x1
#define VR_RFLOW_VALID 0x1000
#define VR_FLOW_FLAG_MIRROR 0x2000
#define VR_FLOW_FLAG_VRFT 0x4000
#define VR_FLOW_FLAG_LINK_LOCAL 0x8000

#define VR_FLOW_FLAG_ACTIVE 0x0001
#define VR_FLOW_FLAG_MODIFIED 0x0100
#define VR_FLOW_FLAG_NEW_FLOW 0x0200
#define VR_FLOW_FLAG_EVICT_CANDIDATE 0x0400
#define VR_FLOW_FLAG_EVICTED 0x0800
#define VR_RFLOW_VALID 0x1000
#define VR_FLOW_FLAG_MIRROR 0x2000
#define VR_FLOW_FLAG_VRFT 0x4000
#define VR_FLOW_FLAG_LINK_LOCAL 0x8000

#define VR_FLOW_FLAG_MASK(flag) ((flag) & ~(VR_FLOW_FLAG_EVICT_CANDIDATE |\
VR_FLOW_FLAG_EVICTED | VR_FLOW_FLAG_NEW_FLOW))
/* rest of the flags are action specific */

/* for NAT */
Expand Down Expand Up @@ -69,6 +75,13 @@ typedef enum {
: AF_INET)
struct vr_forwarding_md;

struct vr_flow_defer_data {
struct vr_flow_queue *vfdd_flow_queue;
struct vr_flow_entry *vfdd_fe;
unsigned int vfdd_fe_index;
bool vfdd_delete;
};

struct vr_common_flow{
unsigned char ip_family;
unsigned char ip_proto;
Expand Down Expand Up @@ -215,6 +228,46 @@ struct vr_flow_queue {
struct vr_packet_node vfq_pnodes[VR_MAX_FLOW_QUEUE_ENTRIES];
};

/*
* Flow eviction:
* 1. Requirement
* --------------
*
* Inactive TCP flows (flows that have already seen the closure cycle - FIN/ACK
* or the RESET flags) should additionally be considered as a free flow entry
* so that vRouter does not have to wait for agent's aging cycle to accommodate
* new flows under severe occupancy and provide better service.
*
* 2. Problems in datapath initiated flow closure
* ----------------------------------------------
*
* . Simultaneous discovery of the same flow entry by two different CPUs
* . Simultaneous closure of an entry by both agent as well as from datapath
* . Handling of packets held in the flow entry when the entry moves from hold to
* closed state
*
* 3. Implementation
* -----------------
*
* 3.1 Marking
* -----------
*
* Once the TCP state machine determines that a flow can be closed, it updates
* the tcp flags with a new flag VR_FLOW_TCP_DEAD, since determining whether a
* tcp flow has seen its end with only the existing TCP flags is a bit more
* involved. The last packet before exiting the module, marks the flow as a an
* eviction candidate (VR_FLOW_FLAG_EVICT_CANDIDATE).
*
* 3.2 Allocation/Eviction
* -----------------------
*
* Once the last packet exits the flow module, a work is scheduled to mark the
* flow as inactive. This work will schedule and RCU call back to mark the entry
* as inactive (this is the same flow for deletion of flow from agent). While
* deleting the entry, the evicted flow will also be marked as evicted (VR_FLOW_
* FLAG_EVICTED).
*
*/
#define VR_FLOW_TCP_FIN 0x0001
#define VR_FLOW_TCP_HALF_CLOSE 0x0002
#define VR_FLOW_TCP_FIN_R 0x0004
Expand All @@ -223,12 +276,14 @@ struct vr_flow_queue {
#define VR_FLOW_TCP_ESTABLISHED 0x0020
#define VR_FLOW_TCP_ESTABLISHED_R 0x0040
#define VR_FLOW_TCP_RST 0x0080
#define VR_FLOW_TCP_DEAD 0x8000

/* align to 8 byte boundary */
#define VR_FLOW_KEY_PAD ((8 - (sizeof(struct vr_flow) % 8)) % 8)

struct vr_dummy_flow_entry {
struct vr_flow fe_key;
uint8_t fe_key_packing;
uint16_t fe_tcp_flags;
unsigned int fe_tcp_seq;
struct vr_flow_queue *fe_hold_list;
Expand All @@ -252,6 +307,7 @@ struct vr_dummy_flow_entry {
/* do not change. any field positions as it might lead to incompatibility */
struct vr_flow_entry {
struct vr_flow fe_key;
uint8_t fe_key_packing;
uint16_t fe_tcp_flags;
unsigned int fe_tcp_seq;
struct vr_flow_queue *fe_hold_list;
Expand Down Expand Up @@ -305,6 +361,7 @@ struct vr_flow_md {
struct vr_flow_trap_arg {
unsigned int vfta_index;
unsigned int vfta_nh_index;
struct vr_flow_stats vfta_stats;
};

struct vr_packet;
Expand Down
1 change: 1 addition & 0 deletions include/vrouter.h
Expand Up @@ -55,6 +55,7 @@ enum vr_malloc_objects_t {
VR_FLOW_LINK_LOCAL_OBJECT,
VR_FLOW_METADATA_OBJECT,
VR_FLOW_TABLE_INFO_OBJECT,
VR_FLOW_DEFER_DATA_OBJECT,
VR_FRAGMENT_OBJECT,
VR_FRAGMENT_QUEUE_OBJECT,
VR_FRAGMENT_QUEUE_ELEMENT_OBJECT,
Expand Down
1 change: 1 addition & 0 deletions utils/dropstats.c
Expand Up @@ -22,6 +22,7 @@
#include "vr_os.h"
#include "vr_types.h"
#include "vr_nexthop.h"
#include "ini_parser.h"
#include "nl_util.h"
#include "ini_parser.h"

Expand Down
16 changes: 15 additions & 1 deletion utils/flow.c
Expand Up @@ -137,7 +137,8 @@ dump_legend(void)
printf("L=Link Local Port)\n");

printf(" Other:K(nh)=Key_Nexthop, S(nh)=RPF_Nexthop\n");
printf(" TCP(r=reverse):S=SYN, F=FIN, R=RST, C=HalfClose, E=Established\n");
printf(" Flags:E=Evicted, Ec=Evict Candidate, N=New Flow, M=Modified\n");
printf("TCP(r=reverse):S=SYN, F=FIN, R=RST, C=HalfClose, E=Established, D=Dead\n");
printf("\n");

return;
Expand Down Expand Up @@ -284,6 +285,17 @@ dump_table(struct flow_table *ft)
printf("(%u)", fe->fe_drop_reason);
}

printf(", ");
printf("Flags:");
if (fe->fe_flags & VR_FLOW_FLAG_EVICTED)
printf("E");
if (fe->fe_flags & VR_FLOW_FLAG_EVICT_CANDIDATE)
printf("Ec");
if (fe->fe_flags & VR_FLOW_FLAG_NEW_FLOW)
printf("N");
if (fe->fe_flags & VR_FLOW_FLAG_MODIFIED)
printf("M");

printf(", ");
if (fe->fe_key.flow4_proto == VR_IP_PROTO_TCP) {
printf("TCP:");
Expand All @@ -305,6 +317,8 @@ dump_table(struct flow_table *ft)
printf("R");
if (fe->fe_tcp_flags & VR_FLOW_TCP_HALF_CLOSE)
printf("C");
if (fe->fe_tcp_flags & VR_FLOW_TCP_DEAD)
printf("D");

printf(", ");
}
Expand Down

0 comments on commit 507fda3

Please sign in to comment.