Skip to content

Commit

Permalink
Fix TOR Agent Crash
Browse files Browse the repository at this point in the history
Issue:
------
in a scaled setup when connection flaps in a short
duration, we try to stop the DB table walk in OVSDB
object destructor, but by that time we already would
have released client idl pointer, here it tries to
stop walk using NULL client idl pointer resulting
in this crash

Fix:
----
Stop DB table walk, before removing client idl reference
in EmptyTable.

Added test case to simulate

Closes-Bug: 1453064
Change-Id: I7fd0233acd8d2ce0d5ea094c396850d435149cd1
  • Loading branch information
Prabhjot Singh Sethi committed May 14, 2015
1 parent e3a5a00 commit fce7629
Show file tree
Hide file tree
Showing 7 changed files with 235 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ class OvsdbClientIdl {
bool KeepAliveTimerCb();
void TriggerDeletion();
bool IsDeleted() const { return deleted_; }
int refcount() const { return refcount_; }

private:
friend void ovsdb_wrapper_idl_callback(void *, int, struct ovsdb_idl_row *);
Expand Down
11 changes: 9 additions & 2 deletions src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_client_tcp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ const boost::asio::ip::tcp::endpoint &OvsdbClientTcp::server_ep() const {
return server_ep_;
}

void OvsdbClientTcp::set_connect_complete_cb(SessionEventCb cb) {
connect_complete_cb_ = cb;
}

OvsdbClientSession *OvsdbClientTcp::FindSession(Ip4Address ip, uint16_t port) {
// match both ip and port with available session
// if port is not provided match only ip
Expand Down Expand Up @@ -192,6 +196,8 @@ uint16_t OvsdbClientTcpSession::remote_port() const {
}

bool OvsdbClientTcpSession::ProcessSessionEvent(OvsdbSessionEvent ovs_event) {
OvsdbClientTcp *ovs_server =
static_cast<OvsdbClientTcp *>(server());
boost::system::error_code ec;
switch (ovs_event.event) {
case TcpSession::CONNECT_FAILED:
Expand All @@ -208,8 +214,6 @@ bool OvsdbClientTcpSession::ProcessSessionEvent(OvsdbSessionEvent ovs_event) {
// Trigger close for the current session, to allocate
// and start a new one.
OnClose();
OvsdbClientTcp *ovs_server =
static_cast<OvsdbClientTcp *>(server());
if (ovs_server->shutdown_ == false) {
ovs_server->session_ = ovs_server->CreateSession();
ovs_server->Connect(ovs_server->session_,
Expand All @@ -224,6 +228,9 @@ bool OvsdbClientTcpSession::ProcessSessionEvent(OvsdbSessionEvent ovs_event) {
assert(ec.value() == 0);
set_status("Established");
OnEstablish();
if (!ovs_server->connect_complete_cb_.empty()) {
ovs_server->connect_complete_cb_(this);
}
break;
default:
break;
Expand Down
6 changes: 6 additions & 0 deletions src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_client_tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ class OvsdbClientTcpSession : public OvsdbClientSession, public TcpSession {

class OvsdbClientTcp : public TcpServer, public OvsdbClient {
public:
typedef boost::function<void (OvsdbClientTcpSession *)> SessionEventCb;

OvsdbClientTcp(Agent *agent, IpAddress tor_ip, int tor_port,
IpAddress tsn_ip, int keepalive_interval,
OvsPeerManager *manager);
Expand All @@ -114,6 +116,9 @@ class OvsdbClientTcp : public TcpServer, public OvsdbClient {
Ip4Address tsn_ip();
const boost::asio::ip::tcp::endpoint &server_ep() const;

// Used by Test Code to trigger events in specific order
void set_connect_complete_cb(SessionEventCb cb);

// API to shutdown the TCP server
void shutdown();

Expand All @@ -128,6 +133,7 @@ class OvsdbClientTcp : public TcpServer, public OvsdbClient {
boost::asio::ip::tcp::endpoint server_ep_;
Ip4Address tsn_ip_;
bool shutdown_;
SessionEventCb connect_complete_cb_;
DISALLOW_COPY_AND_ASSIGN(OvsdbClientTcp);
};
};
Expand Down
10 changes: 6 additions & 4 deletions src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,7 @@ OvsdbDBObject::OvsdbDBObject(OvsdbClientIdl *idl, DBTable *tbl,
}

OvsdbDBObject::~OvsdbDBObject() {
if (walkid_ != DBTableWalker::kInvalidWalkerId) {
DBTableWalker *walker = client_idl_->agent()->db()->GetWalker();
walker->WalkCancel(walkid_);
}
assert(walkid_ == DBTableWalker::kInvalidWalkerId);
}

void OvsdbDBObject::OvsdbRegisterDBTable(DBTable *tbl) {
Expand Down Expand Up @@ -134,6 +131,11 @@ void OvsdbDBObject::DeleteTable(void) {

void OvsdbDBObject::EmptyTable(void) {
if (delete_scheduled()) {
if (walkid_ != DBTableWalker::kInvalidWalkerId) {
DBTableWalker *walker = client_idl_->agent()->db()->GetWalker();
walker->WalkCancel(walkid_);
walkid_ = DBTableWalker::kInvalidWalkerId;
}
client_idl_ = NULL;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/vnsw/agent/ovs_tor_agent/ovsdb_client/test/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ AgentEnv.MakeTestCmd(env, 'test_ovs_multicast_local', agent_suite)
AgentEnv.MakeTestCmd(env, 'test_ovs_unicast_remote', agent_suite)
AgentEnv.MakeTestCmd(env, 'test_ovs_unicast_local', agent_suite)
AgentEnv.MakeTestCmd(env, 'test_ovs_vlan_port', agent_suite)
AgentEnv.MakeTestCmd(env, 'test_ovs_event', agent_suite)

AgentEnv.MakeTestCmd(env, 'test_agent_route_export', disabled_ovsdb_suite)

Expand Down
210 changes: 210 additions & 0 deletions src/vnsw/agent/ovs_tor_agent/ovsdb_client/test/test_ovs_event.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
* Copyright (c) 2015 Juniper Networks, Inc. All rights reserved.
*/

#include "base/os.h"
#include "testing/gunit.h"

#include <base/logging.h>
#include <io/event_manager.h>
#include <io/test/event_manager_test.h>
#include <tbb/task.h>
#include <base/task.h>

#include <cmn/agent_cmn.h>

#include "cfg/cfg_init.h"
#include "cfg/cfg_interface.h"
#include "pkt/pkt_init.h"
#include "services/services_init.h"
#include "vrouter/ksync/ksync_init.h"
#include "oper/interface_common.h"
#include "oper/nexthop.h"
#include "oper/tunnel_nh.h"
#include "route/route.h"
#include "oper/vrf.h"
#include "oper/mpls.h"
#include "oper/vm.h"
#include "oper/vn.h"
#include "filter/acl.h"
#include "openstack/instance_service_server.h"
#include "test_cmn_util.h"
#include "vr_types.h"

#include "openstack/instance_service_server.h"
#include "xmpp/xmpp_init.h"
#include "xmpp/test/xmpp_test_util.h"
#include "vr_types.h"
#include "control_node_mock.h"
#include "xml/xml_pugi.h"
#include "controller/controller_peer.h"
#include "controller/controller_export.h"
#include "controller/controller_vrf_export.h"

#include "ovs_tor_agent/ovsdb_client/ovsdb_route_peer.h"
#include "ovs_tor_agent/ovsdb_client/physical_switch_ovsdb.h"
#include "ovs_tor_agent/ovsdb_client/logical_switch_ovsdb.h"
#include "ovs_tor_agent/ovsdb_client/physical_port_ovsdb.h"
#include "ovs_tor_agent/ovsdb_client/unicast_mac_local_ovsdb.h"
#include "test_ovs_agent_init.h"
#include "test-xml/test_xml.h"
#include "test-xml/test_xml_oper.h"
#include "test_xml_physical_device.h"
#include "test_xml_ovsdb.h"

#include "test_ovs_agent_util.h"

#include <ovsdb_types.h>

using namespace pugi;
using namespace OVSDB;
using namespace boost::uuids;

EventManager evm1;
ServerThread *thread1;
test::ControlNodeMock *bgp_peer1;

EventManager evm2;
ServerThread *thread2;
test::ControlNodeMock *bgp_peer2;

void RouterIdDepInit(Agent *agent) {
Agent::GetInstance()->controller()->Connect();
}

class OvsdbEventTest : public ::testing::Test {
public:
OvsdbEventTest() {
}

UnicastMacLocalEntry *FindUcastLocal(const string &logical_switch,
const string &mac) {
UnicastMacLocalOvsdb *table =
tcp_session_->client_idl()->unicast_mac_local_ovsdb();
UnicastMacLocalEntry key(table, logical_switch, mac);
UnicastMacLocalEntry *entry =
static_cast<UnicastMacLocalEntry *> (table->Find(&key));
return entry;
}

virtual void SetUp() {
agent_ = Agent::GetInstance();
init_ = static_cast<TestOvsAgentInit *>(client->agent_init());
peer_manager_ = init_->ovs_peer_manager();
WAIT_FOR(100, 10000,
(tcp_session_ = static_cast<OvsdbClientTcpSession *>
(init_->ovsdb_client()->NextSession(NULL))) != NULL);
WAIT_FOR(100, 10000,
(tcp_session_->client_idl() != NULL));
WAIT_FOR(100, 10000, (tcp_session_->status() == string("Established")));

AgentUtXmlTest test("controller/src/vnsw/agent/ovs_tor_agent/ovsdb_"
"client/test/xml/ucast-local-test-setup.xml");
// set current session in test context
OvsdbTestSetSessionContext(tcp_session_);
AgentUtXmlOperInit(&test);
AgentUtXmlPhysicalDeviceInit(&test);
AgentUtXmlOvsdbInit(&test);
if (test.Load() == true) {
test.ReadXml();
string str;
test.ToString(&str);
cout << str << endl;
test.Run();
}
}

virtual void TearDown() {
AgentUtXmlTest test("controller/src/vnsw/agent/ovs_tor_agent/ovsdb_"
"client/test/xml/ucast-local-test-teardown.xml");
// set current session in test context
OvsdbTestSetSessionContext(tcp_session_);
AgentUtXmlOperInit(&test);
AgentUtXmlPhysicalDeviceInit(&test);
AgentUtXmlOvsdbInit(&test);
if (test.Load() == true) {
test.ReadXml();
string str;
test.ToString(&str);
cout << str << endl;
test.Run();
}
}

void NoOpSessionEvent(OvsdbClientTcpSession *session) {
}

void ImmediateCloseSessionEventDone(OvsdbClientTcpSession *session) {
immediate_close_done_ = true;
tcp_session_ = session;
OvsdbClientTcp *ovs_server =
static_cast<OvsdbClientTcp *>(init_->ovsdb_client());
ovs_server->set_connect_complete_cb(
boost::bind(&OvsdbEventTest::NoOpSessionEvent, this, _1));
}

void ImmediateCloseSessionEvent(OvsdbClientTcpSession *session) {
if (session->client_idl()->deleted()) {
return;
}
// take reference for idl to validate
immediate_close_idl_ = session->client_idl();
session->TriggerClose();
OvsdbClientTcp *ovs_server =
static_cast<OvsdbClientTcp *>(init_->ovsdb_client());
ovs_server->set_connect_complete_cb(
boost::bind(&OvsdbEventTest::ImmediateCloseSessionEventDone, this, _1));
tcp_session_ = NULL;
}

Agent *agent_;
TestOvsAgentInit *init_;
OvsPeerManager *peer_manager_;
OvsdbClientTcpSession *tcp_session_;
OvsdbClientIdlPtr immediate_close_idl_;
tbb::atomic<bool> immediate_close_done_;
};

TEST_F(OvsdbEventTest, ImmediateConnectionDown) {
TestTaskHold *hold =
new TestTaskHold(TaskScheduler::GetInstance()->GetTaskId("db::DBTableStop"), 0);

OvsdbClientTcp *ovs_server =
static_cast<OvsdbClientTcp *>(init_->ovsdb_client());
ovs_server->set_connect_complete_cb(
boost::bind(&OvsdbEventTest::ImmediateCloseSessionEvent, this, _1));

immediate_close_done_ = false;
tcp_session_->TriggerClose();
// wait for completion of immediate_close
WAIT_FOR(500, 10000, (immediate_close_done_ == true));

// wait for idl table delete complete
// only ref count remaining for session and test code
WAIT_FOR(300, 10000, (immediate_close_idl_->refcount() == 2));
immediate_close_idl_ = NULL;

WAIT_FOR(100, 10000, tcp_session_ != NULL &&
tcp_session_->status() == string("Established"));
delete hold;
client->WaitForIdle();
}

int main(int argc, char *argv[]) {
GETUSERARGS();
// override with true to initialize ovsdb server and client
ksync_init = true;
client = OvsTestInit(init_file, ksync_init);

// create a task exclusion policy to hold db::DBTable task
TaskScheduler *scheduler = TaskScheduler::GetInstance();
TaskPolicy policy;
int task_id = scheduler->GetTaskId("db::DBTable");
policy.push_back(TaskExclusion(task_id));
scheduler->SetPolicy(scheduler->GetTaskId("db::DBTableStop"), policy);
client->WaitForIdle();

int ret = RUN_ALL_TESTS();
TestShutdown();
return ret;
}
3 changes: 2 additions & 1 deletion src/vnsw/agent/test/test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ bool TestTaskHold::HoldTask::Run() {
return true;
}

TestTaskHold::TestTaskHold(int task_id, int task_instance) {
TestTaskHold::TestTaskHold(int task_id, int task_instance) :
task_id_(task_id), task_instance_(task_instance) {
task_held_ = false;
HoldTask *task_entry = new HoldTask(this);
TaskScheduler *scheduler = TaskScheduler::GetInstance();
Expand Down

0 comments on commit fce7629

Please sign in to comment.