From 116729fc2788609d247676dcccbd9ad8e7c63efd Mon Sep 17 00:00:00 2001 From: Manish Date: Thu, 13 Aug 2015 09:55:15 +0530 Subject: [PATCH] Agent crashes in peer handling. Problem and fixes: There were three different scenarios where agent was crashing. 1) Bgp-peer State on Vrf: Channel has gone down and peer has been decommisioned. This results in walk start. Before the walk one of the VRF is deleted. Walk reaches this deleted VRF and ignores it as it is delete marked assuming that notification will take care of removal of state. However before notification happens, walk is done and peer released. This results in failure as state is not yet removed. Fix: Dont ignore deleted VRF in walk for deleting peer. 2) Deleted route state addition: Again the same case of deleted peer i.e. channel has gone down resulting in walk to delete state for this peer. Every VRF walk is accompained with route table walk. There is a state maintained on route whose listener(on route table) is stored in state on VRF for that table. This listener on route table is deleted when route walk done is called. Walk itself would have deleted all states on route in this table. Point to note that walk done is enqueued for processing. So it may happen that there is some change in one of the route along with channel coming up with some other peer. RouteExport::Notify will get a call for that route and it will try to see if channel is active. Since new peer came up channel is seen as active. But this peer still has not added it vrf state. Hence Vrf state will be null. Issue happens here. Instead of returning after seeing no vrf state, processing continues and adds state back on route with old id (from old peer). Fix: Return when no vrf state is found. 3) Multicast route was adding state even though its delete marked. This was happening because in MulticastExport::Notify non-null state was and'd with route delete check for delete processing. So if state was null, route delete notification used to skip delete handling and instead land in addition of state. Fix: Decouple state check. Ignore delete notification and return when state is null. Change-Id: Ib5156a2ee6b72902a6ff75f05740549f19874241 Closes-bug: 1480375 --- .../agent/controller/controller_export.cc | 39 ++- .../controller/controller_route_walker.cc | 11 +- src/vnsw/agent/test/test_l2route.cc | 230 ++++++++++++++++-- 3 files changed, 251 insertions(+), 29 deletions(-) diff --git a/src/vnsw/agent/controller/controller_export.cc b/src/vnsw/agent/controller/controller_export.cc index 59783c21d68..8b03c3f64aa 100644 --- a/src/vnsw/agent/controller/controller_export.cc +++ b/src/vnsw/agent/controller/controller_export.cc @@ -105,12 +105,21 @@ void RouteExport::Notify(const Agent *agent, VrfExport::State *vs = static_cast(vrf->GetState(vrf->get_table(), vrf_id)); - if (vs) { - DBTableBase::ListenerId id = vs->rt_export_[route->GetTableType()]-> - GetListenerId(); - if (id != id_) - return; - } + // If VRF state is not present then listener has not been added. + // Addition of listener later will result in walk to notify all routes. + // That in turn will add state as well by calling current routine. + // Therefore return when empty VRF state is found. + if (!vs) + return; + + // There may be instances when decommisioned peer is not yet + // unregistered while a new peer is already present. So there will be + // two notifications. If its for decommisioned peer then ignore the same + // by checking the listener id with active bgp peer listener id. + DBTableBase::ListenerId id = vs->rt_export_[route->GetTableType()]-> + GetListenerId(); + if (id != id_) + return; } if (route->is_multicast()) { @@ -276,7 +285,11 @@ void RouteExport::MulticastNotify(AgentXmppChannel *bgp_xmpp_peer, //Handle withdraw for following cases: //- Route is not having any active multicast exportable path or is deleted. //- associate(false): Bgp Peer has gone down and state needs to be removed. - if ((route_can_be_dissociated || !associate) && (state != NULL)) { + if (route_can_be_dissociated || !associate) { + if (state == NULL) { + return; + } + if (state->fabric_multicast_exported_ == true) { AgentXmppChannel::ControllerSendMcastRouteDelete(bgp_xmpp_peer, route); @@ -286,12 +299,12 @@ void RouteExport::MulticastNotify(AgentXmppChannel *bgp_xmpp_peer, if ((state->ingress_replication_exported_ == true)) { state->tunnel_type_ = TunnelType::INVALID; AgentXmppChannel::ControllerSendEvpnRouteDelete(bgp_xmpp_peer, - route, - state->vn_, - state->label_, - state->destination_, - state->source_, - TunnelType::AllType()); + route, + state->vn_, + state->label_, + state->destination_, + state->source_, + TunnelType::AllType()); state->ingress_replication_exported_ = false; } diff --git a/src/vnsw/agent/controller/controller_route_walker.cc b/src/vnsw/agent/controller/controller_route_walker.cc index a13923501ab..64b43d3aab1 100644 --- a/src/vnsw/agent/controller/controller_route_walker.cc +++ b/src/vnsw/agent/controller/controller_route_walker.cc @@ -33,8 +33,15 @@ bool ControllerRouteWalker::VrfWalkNotify(DBTablePartBase *partition, DBEntryBase *entry) { VrfEntry *vrf = static_cast(entry); // Notification from deleted VRF should have taken care of all operations - // w.r.t. peer, see VrfExport::Notify - if (vrf->IsDeleted()) + // w.r.t. peer, see VrfExport::Notify + // Exception is DelPeer walk. Reason being that this walk will start when + // peer goes down in agent xmpp channel. When it happens bgp peer in channel + // is reset. Any add/delete notification on said VRF checks if xmpp channel + // is active which internally checks for bgp peer. In current case it will + // be NULL and will in-turn ignore notification for delete. State will + // not be deleted for that peer in vrf. To delete state, delpeer walk will + // have to traverse deleted VRF as well. + if (vrf->IsDeleted() && (type_ != DELPEER)) return true; switch (type_) { diff --git a/src/vnsw/agent/test/test_l2route.cc b/src/vnsw/agent/test/test_l2route.cc index 7f1e7df4e6f..f0266d9ff3a 100644 --- a/src/vnsw/agent/test/test_l2route.cc +++ b/src/vnsw/agent/test/test_l2route.cc @@ -116,18 +116,6 @@ class RouteTest : public ::testing::Test { } virtual void TearDown() { - TestRouteTable table1(1); - WAIT_FOR(100, 100, (table1.Size() == 0)); - EXPECT_EQ(table1.Size(), 0U); - - TestRouteTable table2(2); - WAIT_FOR(100, 100, (table2.Size() == 0)); - EXPECT_EQ(table2.Size(), 0U); - - TestRouteTable table3(3); - WAIT_FOR(100, 100, (table3.Size() == 0)); - EXPECT_EQ(table3.Size(), 0U); - VrfDelReq(vrf_name_.c_str()); client->WaitForIdle(); WAIT_FOR(100, 100, (VrfFind(vrf_name_.c_str()) != true)); @@ -799,7 +787,7 @@ TEST_F(RouteTest, all_evpn_routes_deleted_when_local_vms_are_gone) { rt = L2RouteGet("vrf1", MacAddress::FromString("ff:ff:ff:ff:ff:ff"), Ip4Address(0)); - EXPECT_TRUE(rt == NULL); + EXPECT_TRUE(rt->FindPath(bgp_peer_ptr) == NULL); DeleteVmportEnv(input, 1, true); client->WaitForIdle(); @@ -872,7 +860,7 @@ TEST_F(RouteTest, evpn_mcast_label_deleted) { //Label should have been released EXPECT_TRUE(agent_->mpls_table()->FindActiveEntry(new MplsLabelKey(evpn_mpls_label)) == NULL); //route should get deleted - EXPECT_TRUE(rt == NULL); + EXPECT_TRUE(rt->FindPath(bgp_peer_ptr) == NULL); CreateVmportEnv(input, 1); client->WaitForIdle(); @@ -891,6 +879,220 @@ TEST_F(RouteTest, evpn_mcast_label_deleted) { bgp_peer.reset(); } +//Let VRF be unsubscribed from Bgp Peer and then add +//route in same. This route should not be sent to BGP +//peer. +TEST_F(RouteTest, DISABLED_send_route_add_in_not_subscribed_vrf) { + struct PortInfo input[] = { + {"vnet1", 1, "1.1.1.10", "00:00:01:01:01:10", 1, 1}, + }; + + client->Reset(); + //Add a peer and enqueue path add in multicast route. + BgpPeer *bgp_peer_ptr = CreateBgpPeer(Ip4Address(1), "BGP Peer1"); + boost::shared_ptr bgp_peer = + bgp_peer_ptr->GetBgpXmppPeer()->bgp_peer_id_ref(); + CreateVmportEnv(input, 1); + client->WaitForIdle(); + + WAIT_FOR(1000, 1000, + (L2RouteGet(vrf_name_, local_vm_mac_, local_vm_ip4_) != NULL)); + EvpnRouteEntry *rt = EvpnRouteGet("vrf1", local_vm_mac_, local_vm_ip4_, 0); + RouteExport::State *route_state = static_cast + (bgp_peer_ptr->GetRouteExportState(rt->get_table_partition(), + rt)); + EXPECT_TRUE(route_state->exported_ == true); + + VrfEntry *vrf = VrfGet(vrf_name_.c_str()); + RouteExport *rt_export_tmp[Agent::ROUTE_TABLE_MAX]; + VrfExport::State *vrf_state = static_cast + (bgp_peer_ptr->GetVrfExportState(vrf->get_table_partition(), vrf)); + for (uint8_t table_type = (Agent::INVALID + 1); + table_type < Agent::ROUTE_TABLE_MAX; table_type++) { + rt_export_tmp[table_type] = vrf_state->rt_export_[table_type]; + } + + //Stop BGP peer + vrf->ClearState(vrf->get_table_partition()->parent(), + bgp_peer_ptr->GetVrfExportListenerId()); + //Mark Old route as not exported. + route_state->exported_ = false; + rt = EvpnRouteGet("vrf1", local_vm_mac_, local_vm_ip4_, 0); + rt->get_table_partition()->Notify(rt); + client->WaitForIdle(); + EXPECT_TRUE(route_state->exported_ == false); + route_state->exported_ = true; + + for (uint8_t table_type = (Agent::INVALID + 1); + table_type < Agent::ROUTE_TABLE_MAX; table_type++) { + if (rt_export_tmp[table_type]) + (rt_export_tmp[table_type])->Unregister(); + } + //Delete route + //delete vrf_state; + DeleteRoute(agent_->local_vm_peer(), vrf_name_, remote_vm_mac_, + remote_vm_ip4_); + client->WaitForIdle(); + + DeleteVmportEnv(input, 1, true); + client->WaitForIdle(); + DeleteBgpPeer(bgp_peer.get()); + client->WaitForIdle(); + bgp_peer.reset(); +} + +TEST_F(RouteTest, notify_on_vrf_with_deleted_state_for_peer) { + struct PortInfo input[] = { + {"vnet1", 1, "1.1.1.10", "00:00:01:01:01:10", 1, 1}, + }; + + client->Reset(); + //Add a peer and enqueue path add in multicast route. + BgpPeer *bgp_peer_ptr = CreateBgpPeer(Ip4Address(1), "BGP Peer1"); + boost::shared_ptr bgp_peer = + bgp_peer_ptr->GetBgpXmppPeer()->bgp_peer_id_ref(); + CreateVmportEnv(input, 1); + client->WaitForIdle(); + + WAIT_FOR(1000, 1000, + (L2RouteGet(vrf_name_, local_vm_mac_, local_vm_ip4_) != NULL)); + EvpnRouteEntry *rt = EvpnRouteGet("vrf1", local_vm_mac_, local_vm_ip4_, 0); + RouteExport::State *route_state = static_cast + (bgp_peer_ptr->GetRouteExportState(rt->get_table_partition(), + rt)); + EXPECT_TRUE(route_state->exported_ == true); + + VrfEntry *vrf = VrfGet(vrf_name_.c_str()); + RouteExport *rt_export_tmp[Agent::ROUTE_TABLE_MAX]; + VrfExport::State *vrf_state = static_cast + (bgp_peer_ptr->GetVrfExportState(vrf->get_table_partition(), vrf)); + for (uint8_t table_type = (Agent::INVALID + 1); + table_type < Agent::ROUTE_TABLE_MAX; table_type++) { + rt_export_tmp[table_type] = vrf_state->rt_export_[table_type]; + rt->ClearState(rt->get_table(), rt_export_tmp[table_type]->GetListenerId()); + } + + //Stop BGP peer + bgp_peer_ptr->GetBgpXmppPeer()->DeCommissionBgpPeer(); + //create another bgp peer + bgp_peer_ptr->GetBgpXmppPeer()->CreateBgpPeer(); + //Mark Old route as not exported. + rt = EvpnRouteGet("vrf1", local_vm_mac_, local_vm_ip4_, 0); + rt->get_table_partition()->Notify(rt); + client->WaitForIdle(); + route_state = static_cast + (bgp_peer_ptr->GetRouteExportState(rt->get_table_partition(), + rt)); + EXPECT_TRUE(route_state == NULL); + AgentXmppChannel::HandleAgentXmppClientChannelEvent(bgp_peer.get()->GetBgpXmppPeer(), + xmps::NOT_READY); + client->WaitForIdle(); + bgp_peer_ptr->DelPeerRoutes( + boost::bind(&VNController::ControllerPeerHeadlessAgentDelDoneEnqueue, + agent_->controller(), bgp_peer_ptr)); + client->WaitForIdle(); + + //Delete route + DeleteRoute(agent_->local_vm_peer(), vrf_name_, remote_vm_mac_, + remote_vm_ip4_); + client->WaitForIdle(); + + DeleteVmportEnv(input, 1, true); + client->WaitForIdle(); + DeleteBgpPeer(bgp_peer.get()); + client->WaitForIdle(); + bgp_peer.reset(); +} + +TEST_F(RouteTest, delete_notify_on_multicast_rt_with_no_state) { + struct PortInfo input[] = { + {"vnet1", 1, "1.1.1.10", "00:00:01:01:01:10", 1, 1}, + }; + + client->Reset(); + //Add a peer and enqueue path add in multicast route. + BgpPeer *bgp_peer_ptr = CreateBgpPeer(Ip4Address(1), "BGP Peer1"); + boost::shared_ptr bgp_peer = + bgp_peer_ptr->GetBgpXmppPeer()->bgp_peer_id_ref(); + CreateVmportEnv(input, 1); + client->WaitForIdle(); + + WAIT_FOR(1000, 1000, + (L2RouteGet(vrf_name_, local_vm_mac_, local_vm_ip4_) != NULL)); + //Delete route local_vm_peer + DeleteVmportEnv(input, 1, false); + client->WaitForIdle(); + //Delete state + BridgeRouteEntry *rt = L2RouteGet("vrf1", + MacAddress::FromString("ff:ff:ff:ff:ff:ff"), + Ip4Address(0)); + RouteExport::State *route_state = static_cast + (bgp_peer_ptr->GetRouteExportState(rt->get_table_partition(), + rt)); + EXPECT_TRUE(route_state != NULL); + VrfExport::State *vs = static_cast + (bgp_peer_ptr->GetVrfExportState(agent_->vrf_table()->GetTablePartition(rt->vrf()), + rt->vrf())); + rt->ClearState(rt->get_table(), + vs->rt_export_[Agent::BRIDGE]->GetListenerId()); + delete route_state; + //Now delete local peer from route + BridgeAgentRouteTable::DeleteBroadcastReq(agent_->local_peer(), + "vrf1", 1, + Composite::L2COMP); + client->WaitForIdle(); + DeleteBgpPeer(bgp_peer.get()); + client->WaitForIdle(); + bgp_peer.reset(); +} + +// Reset agentxmppchannel in active xmpp channel of agent. +// This results in ignoring any notification as channel is not active. +// Now delete VRF, but take a reference to hold it. +// Start a delpeer walk for Bgp peer of channel. +// Verify if state is deleted in VRF. +// Objective: +// Analogous to channel coming down however VRF delete notification coming later +// and delpeer walk running after that. +TEST_F(RouteTest, delpeer_walk_on_deleted_vrf) { + struct PortInfo input[] = { + {"vnet1", 1, "1.1.1.10", "00:00:00:01:01:01", 1, 1}, + }; + + client->Reset(); + CreateVmportEnv(input, 1); + client->WaitForIdle(); + + //Add a peer and enqueue path add in multicast route. + BgpPeer *bgp_peer_ptr = CreateBgpPeer(Ip4Address(1), "BGP Peer1"); + boost::shared_ptr bgp_peer = + bgp_peer_ptr->GetBgpXmppPeer()->bgp_peer_id_ref(); + + //Reset agent xmpp channel and back up the original channel. + AgentXmppChannel *channel1 = agent_->controller_xmpp_channel(1); + agent_->set_controller_xmpp_channel(NULL, 1); + //Take VRF reference and delete VRF. + VrfEntryRef vrf_ref = VrfGet("vrf1"); + DelVrf("vrf1"); + client->WaitForIdle(); + //Restore agent xmpp channel + agent_->set_controller_xmpp_channel(channel1, 1); + + //Now bring channel down + AgentXmppChannel::HandleAgentXmppClientChannelEvent(bgp_peer.get()->GetBgpXmppPeer(), + xmps::NOT_READY); + client->WaitForIdle(); + //Release VRF reference + vrf_ref.reset(); + client->WaitForIdle(); + + DeleteVmportEnv(input, 1, true); + client->WaitForIdle(); + DeleteBgpPeer(bgp_peer.get()); + client->WaitForIdle(); + bgp_peer.reset(); +} + int main(int argc, char *argv[]) { ::testing::InitGoogleTest(&argc, argv); GETUSERARGS();