Skip to content

Commit

Permalink
Ensure flow-stickiness with bridged forward flow and routed reverse flow
Browse files Browse the repository at this point in the history
Support for following scenario,
 - An ECMP source sends a bridged packet to destination
 - On the destination compute node, l2-flow is created
 - Destination VM responds to flow with l3-flow
 - In VRouter packet processing,
   - Packet hits flow without any ECMP-Index
   - Packet go thru route processing (since dest mac is VRRP-MAC)
   - L3-Processing results in ECMP-NH
   - Packet trapped for ECMP resolution since ECMP-Index is not known

In this scenario, we need to pick ECMP-Index such that packet reaches
the reaches original source of packet from (1) after routing.

Agent changes in ECMP Resolve processing:
 - Original source is found from the tunnel-info field of original
   forward flow
 - ECMP index computation logic is changed to choose the NH with
   tunnel-source got above

Closes-Bug: #1645978
(cherry picked from commit 1433319)

Conflicts:
	src/vnsw/agent/pkt/pkt_flow_info.h

Change-Id: I1f7ac4bc46de49bc68f93a9248d40db325eb51bb
  • Loading branch information
praveenkv authored and haripk committed Jan 5, 2017
1 parent ced8f28 commit 7e9c21b
Show file tree
Hide file tree
Showing 8 changed files with 326 additions and 44 deletions.
25 changes: 12 additions & 13 deletions src/vnsw/agent/oper/nexthop.h
Expand Up @@ -201,16 +201,6 @@ class MemberList {
return mbr_list_.size();
}

uint32_t hash(size_t hash) const {
for (uint32_t i = 0; i < mbr_list_.size(); i++) {
if (hash_table_[hash % hash_table_.size()] != 0xffff) {
return hash_table_[hash % hash_table_.size()];
}
hash++;
}
return 0;
}

uint32_t count() const {
int cnt = 0;
for (uint32_t i = 0; i < mbr_list_.size(); i++) {
Expand Down Expand Up @@ -1431,12 +1421,21 @@ class CompositeNH : public NextHop {
const VrfEntry* vrf() const {
return vrf_.get();
}
uint32_t hash(uint32_t seed) const {
uint32_t PickMember(uint32_t seed, const NextHop *affinity_nh) const {
uint32_t idx = kInvalidComponentNHIdx;
size_t size = component_nh_list_.size();
if (size == 0) {
return kInvalidComponentNHIdx;
return idx;
}
uint32_t idx = seed % size;

if (affinity_nh != NULL) {
ComponentNH comp_nh(0, affinity_nh);
if (GetIndex(comp_nh, idx)) {
return idx;
}
}

idx = seed % size;
while (component_nh_list_[idx].get() == NULL ||
component_nh_list_[idx]->nh() == NULL ||
component_nh_list_[idx]->nh()->IsActive() == false) {
Expand Down
2 changes: 2 additions & 0 deletions src/vnsw/agent/pkt/flow_entry.h
Expand Up @@ -656,6 +656,8 @@ class FlowEntry {
void set_flow_mgmt_info(FlowEntryInfo *info) {
flow_mgmt_info_.reset(info);
}
bool IsReverseFlow() const { return is_flags_set(FlowEntry::ReverseFlow); }
bool IsForwardFlow() const { return !is_flags_set(FlowEntry::ReverseFlow); }
private:
friend class FlowTable;
friend class FlowEntryFreeList;
Expand Down
119 changes: 102 additions & 17 deletions src/vnsw/agent/pkt/pkt_flow_info.cc
Expand Up @@ -216,8 +216,9 @@ static bool NhDecode(const NextHop *nh, const PktInfo *pkt, PktFlowInfo *info,
if (info->out_component_nh_idx ==
CompositeNH::kInvalidComponentNHIdx ||
(comp_nh->GetNH(info->out_component_nh_idx) == NULL)) {
info->out_component_nh_idx = comp_nh->hash(pkt->
hash(ecmp_load_balance));
info->out_component_nh_idx = comp_nh->PickMember
(pkt->hash(ecmp_load_balance),
info->ecmp_component_affinity_nh);
}
nh = comp_nh->GetNH(info->out_component_nh_idx);
// TODO: Should we re-hash here?
Expand Down Expand Up @@ -1357,9 +1358,9 @@ void PktFlowInfo::EgressProcess(const PktInfo *pkt, PktControlInfo *in,
if (out->rt_) {
if (ecmp && out->rt_->GetActivePath()) {
const CompositeNH *comp_nh = static_cast<const CompositeNH *>(nh);
out_component_nh_idx = comp_nh->hash(pkt->
hash(out->rt_->GetActivePath()->
ecmp_load_balance()));
out_component_nh_idx = comp_nh->PickMember
(pkt->hash(out->rt_->GetActivePath()->ecmp_load_balance()),
ecmp_component_affinity_nh);
}
if (out->rt_->GetActiveNextHop()->GetType() == NextHop::ARP ||
out->rt_->GetActiveNextHop()->GetType() == NextHop::RESOLVE) {
Expand Down Expand Up @@ -1672,6 +1673,32 @@ void PktFlowInfo::UpdateEvictedFlowStats(const PktInfo *pkt) {
}
}

// We want to retain forward and reverse flow semantics when flows are
// being processed due to revaluation or ECMP resolution
//
// If the flow corresponding to the request is already present as reverse flow,
// swap the flows being processed
static bool ShouldSwapFlows(const PktFlowInfo *info, const PktInfo *pkt,
const FlowEntry *flow) {
if (flow == NULL)
return false;

if (info->short_flow) {
return false;
}

// If this is message processing, then retain forward and reverse flows
if (pkt->type == PktType::MESSAGE) {
return flow->IsReverseFlow();
}

if (pkt->agent_hdr.cmd == AgentHdr::TRAP_ECMP_RESOLVE) {
return flow->IsReverseFlow();
}

return false;
}

void PktFlowInfo::Add(const PktInfo *pkt, PktControlInfo *in,
PktControlInfo *out) {
bool update = false;
Expand Down Expand Up @@ -1730,9 +1757,9 @@ void PktFlowInfo::Add(const PktInfo *pkt, PktControlInfo *in,
}

bool swap_flows = false;
// If this is message processing, then retain forward and reverse flows
if (pkt->type == PktType::MESSAGE && !short_flow &&
flow_entry->is_flags_set(FlowEntry::ReverseFlow)) {
// If this is message processing or ECMP resolution, then retain forward
// and reverse flows
if (ShouldSwapFlows(this, pkt, flow_entry)) {
// for cases where we need to swap flows rflow should always
// be Non-NULL
assert(rflow != NULL);
Expand Down Expand Up @@ -1826,6 +1853,62 @@ void PktFlowInfo::UpdateFipStatsInfo
}
}

// Flow changing from Non-ECMP to ECMP.
// If flow-trapped is forward flow,
// - If this was originally a L2-Flow
// The source transitioned from L2 to L3 Flow. This is unexpected.
// Set ECMP-Index to 0
// - If this was originally a L3-Flow
// Route transitioned from Non-ECMP to ECMP. The NH used in Non-ECMP
// state is added as first member of ECMP-NH.
// Set the ECMP-Index to 0
// If flow-trapped is reverse flow,
// - If this was originally a L2-Flow
// We must send packet to originator of the old flow. The forward flow
// will contain originator info.
// - If old flow received from fabric, tunnel-info in forward flow
// will have source-ip of originator
// - If old flow received from VM, NH in flow-key is the originator VM
// - If this was originally a L3-Flow
// Route transitioned from Non-ECMP to ECMP. The NH used in Non-ECMP
// state is added as first member of ECMP-NH.
// Set the ECMP-Index to 0
void PktFlowInfo::GetEcmpCompositeAffinityNh() {
// Pick the first member in ECMP by default
out_component_nh_idx = 0;
if (flow_entry->IsForwardFlow())
return;

// Get reverse-flow. We will try to setup ECMP member such that packet is
// forwarded to origin of reverse flow
FlowEntry *fwd_flow = flow_entry->reverse_flow_entry();
if (fwd_flow == NULL)
return;

// Affinity-nh is needed only trapped flow is l2-flow
if (flow_entry->l3_flow())
return;

NextHopTable *nh_table = flow_table->agent()->nexthop_table();
if (fwd_flow->is_flags_set(FlowEntry::IngressDir)) {
// Original packet from VM. Get affnity-nh from flow-key
ecmp_component_affinity_nh =
dynamic_cast<NextHop *>(nh_table->FindNextHop(fwd_flow->key().nh));
} else {
// Original packet from fabric. Get affinity-nh from tunnel-info
Ip4Address tunnel_dest(fwd_flow->data().tunnel_info.ip_saddr);
TunnelNHKey key(flow_table->agent()->fabric_vrf_name(),
flow_table->agent()->router_id(), tunnel_dest,
false, fwd_flow->data().tunnel_info.type);
ecmp_component_affinity_nh =
dynamic_cast<NextHop *>(nh_table->Find(&key, false));
}

if (ecmp_component_affinity_nh != NULL) {
out_component_nh_idx = CompositeNH::kInvalidComponentNHIdx;
}
}

//If a packet is trapped for ecmp resolve, dp might have already
//overwritten original packet(NAT case), hence get actual packet by
//overwritting packet with data in flow entry.
Expand All @@ -1845,8 +1928,8 @@ void PktFlowInfo::RewritePktInfo(uint32_t flow_index) {
return;
}

FlowEntry *flow = flow_table->Find(key);
if (!flow) {
flow_entry = flow_table->Find(key);
if (!flow_entry) {
std::ostringstream ostr;
ostr << "ECMP Resolve: unable to find flow index " << flow_index;
PKTFLOW_TRACE(Err,ostr.str());
Expand All @@ -1858,16 +1941,18 @@ void PktFlowInfo::RewritePktInfo(uint32_t flow_index) {
pkt->ip_proto = key.protocol;
pkt->sport = key.src_port;
pkt->dport = key.dst_port;
pkt->agent_hdr.vrf = flow->data().vrf;
pkt->vrf = flow->data().vrf;
pkt->agent_hdr.vrf = flow_entry->data().vrf;
pkt->vrf = flow_entry->data().vrf;
pkt->agent_hdr.nh = key.nh;
// If component_nh_idx is not set, assume that NH transitioned from
// Non ECMP to ECMP. The old Non-ECMP NH would always be placed at index 0
// in this case. So, set ECMP index 0 in this case
if (flow->data().component_nh_idx == CompositeNH::kInvalidComponentNHIdx) {
out_component_nh_idx = 0;
// Non ECMP to ECMP. Set the ecmp_component_affinity_nh so that ECMP member
// computation is set to it
ecmp_component_affinity_nh = NULL;
if (flow_entry->data().component_nh_idx ==
CompositeNH::kInvalidComponentNHIdx) {
GetEcmpCompositeAffinityNh();
} else {
out_component_nh_idx = flow->data().component_nh_idx;
out_component_nh_idx = flow_entry->data().component_nh_idx;
}
return;
}
Expand Down
14 changes: 11 additions & 3 deletions src/vnsw/agent/pkt/pkt_flow_info.h
Expand Up @@ -20,7 +20,7 @@ typedef map<int, int> FlowRouteRefMap;
struct PktControlInfo {
PktControlInfo() :
vrf_(NULL), intf_(NULL), rt_(NULL), vn_(NULL), vm_(NULL),
vlan_nh_(false), vlan_tag_(0) { }
vlan_nh_(false), vlan_tag_(0), nh_(0) { }
virtual ~PktControlInfo() { }

const VrfEntry *vrf_;
Expand Down Expand Up @@ -54,7 +54,7 @@ class PktFlowInfo {
trap_rev_flow(false), fip_snat(false), fip_dnat(false), snat_fip(),
short_flow_reason(0), peer_vrouter(), tunnel_type(TunnelType::INVALID),
flood_unknown_unicast(false), bgp_router_service_flow(false),
alias_ip_flow(false), ttl(0) {
alias_ip_flow(false), ttl(0), ecmp_component_affinity_nh(NULL) {
}

static bool ComputeDirection(const Interface *intf);
Expand Down Expand Up @@ -84,6 +84,7 @@ class PktFlowInfo {
const VnEntry *vn,
MatchPolicy *m_policy);
void RewritePktInfo(uint32_t index);
void GetEcmpCompositeAffinityNh();
bool VrfTranslate(const PktInfo *pkt, PktControlInfo *ctrl,
PktControlInfo *rev_flow, const IpAddress &src_ip,
bool nat_flow);
Expand Down Expand Up @@ -181,7 +182,7 @@ class PktFlowInfo {
std::string peer_vrouter;
TunnelType tunnel_type;

// flow entry obtained from flow IPC, which requires recomputation.
// flow entry being revaluated or trapped for ECMP resolution
FlowEntry *flow_entry;
bool flood_unknown_unicast;

Expand All @@ -192,6 +193,13 @@ class PktFlowInfo {
bool alias_ip_flow;
//TTL of nat'd flow especially bgp-service flows
uint8_t ttl;

// Affinity to Ecmp component NH.
// When a packet is trapped for ECMP Resolution its possible that flow is
// transition from Non-ECMP to ECMP. In that case, the ECMP index must be
// be set to unicast-nh used when flow is non-ECMP. The affinity-nh
// is set in this case.
const NextHop *ecmp_component_affinity_nh;
};

#endif // __agent_pkt_flow_info_h_

0 comments on commit 7e9c21b

Please sign in to comment.