From 8b97ed23a09e5af92210119a5f954b4a6923b35e Mon Sep 17 00:00:00 2001 From: Prabhjot Singh Sethi Date: Thu, 19 May 2016 14:23:00 +0530 Subject: [PATCH] Fix ToR agent crash in KSyncObjectManager::Process Issue: ------ ToR agent relies on commiting bulk transactions to OVSDB server, which on completion can end up as a no-op to OVSDB server, which internally triggers ack to complete KSync states on the associated entries in current bulk context. while delete table processing being happening it can so happen that for the next entry pointer obtained there is already a delete op being triggered and added to Bulk context, while completion of bulk context it can end up completing the KSync state for the next entry pointer and end up deleting it while KSyncObjectManager::Process still assuming to have a valid pointer without holding reference to it. Fix: ---- KSyncObjectManager::Process should take reference to the KSync Entry pointers it is dealing with to assure sanity. Replaced usage of unsafe NotifyEvent with SafeNotifyEvent for OVSDB based triggers. Closes-Bug: 1572287 Change-Id: I10fd79d176f0cfc22315a20c47f4db40fe956800 --- src/ksync/ksync_object.cc | 23 +++++++++++++------ src/ksync/ksync_object.h | 2 +- .../ovsdb_client/ha_stale_dev_vn.cc | 4 ++-- .../ovsdb_client/logical_switch_ovsdb.cc | 2 +- .../ovsdb_client/multicast_mac_local_ovsdb.cc | 2 +- .../ovs_tor_agent/ovsdb_client/ovsdb_entry.cc | 10 ++++---- .../ovsdb_client/ovsdb_object.cc | 2 +- .../ovsdb_client/unicast_mac_remote_ovsdb.cc | 2 +- .../ovs_tor_agent/ovsdb_client/vrf_ovsdb.cc | 2 +- 9 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/ksync/ksync_object.cc b/src/ksync/ksync_object.cc index ec23b1e0c5a..f86cafd204a 100644 --- a/src/ksync/ksync_object.cc +++ b/src/ksync/ksync_object.cc @@ -109,6 +109,7 @@ KSyncEntry *KSyncObject::Find(const KSyncEntry *key) { } KSyncEntry *KSyncObject::Next(const KSyncEntry *entry) const { + tbb::recursive_mutex::scoped_lock lock(lock_); Tree::const_iterator it; if (entry == NULL) { it = tree_.begin(); @@ -1522,7 +1523,8 @@ bool KSyncObjectManager::Process(KSyncObjectEvent *event) { case KSyncObjectEvent::DELETE: { int count = 0; - KSyncEntry *entry; + // hold reference to entry to ensure the pointer sanity + KSyncEntry::KSyncEntryPtr entry(NULL); if (event->ref_.get() == NULL) { event->obj_->set_delete_scheduled(); if (event->obj_->IsEmpty()) { @@ -1537,23 +1539,30 @@ bool KSyncObjectManager::Process(KSyncObjectEvent *event) { entry = event->ref_.get(); } - while (entry != NULL) { - KSyncEntry *next_entry = event->obj_->Next(entry); + // hold reference to entry to ensure the pointer sanity + // next entry can get free'd in certain cases while processing + // current entry + KSyncEntry::KSyncEntryPtr next_entry(NULL); + while (entry.get() != NULL) { + next_entry = event->obj_->Next(entry.get()); count++; if (entry->IsDeleted() == false) { // trigger delete if entry is not marked delete already. - event->obj_->Delete(entry); + event->obj_->Delete(entry.get()); } - if (count == kMaxEntriesProcess && next_entry != NULL) { + if (count == kMaxEntriesProcess && next_entry.get() != NULL) { // update reference with which entry to start with // in next iteration. - event->ref_ = next_entry; + event->ref_ = next_entry.get(); // yeild and re-enqueue event for processing later. event_queue_->Enqueue(event); + + // release reference to deleted entry. + entry = NULL; return false; } - entry = next_entry; + entry = next_entry.get(); } break; } diff --git a/src/ksync/ksync_object.h b/src/ksync/ksync_object.h index e643f37f005..91e58a08f26 100644 --- a/src/ksync/ksync_object.h +++ b/src/ksync/ksync_object.h @@ -153,7 +153,7 @@ class KSyncObject { void ClearStale(KSyncEntry *entry); // Big lock on the tree // TODO: Make this more fine granular - tbb::recursive_mutex lock_; + mutable tbb::recursive_mutex lock_; void ChangeKey(KSyncEntry *entry, uint32_t arg); virtual void UpdateKey(KSyncEntry *entry, uint32_t arg) { } diff --git a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ha_stale_dev_vn.cc b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ha_stale_dev_vn.cc index 693b226f936..388179fbdcc 100644 --- a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ha_stale_dev_vn.cc +++ b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ha_stale_dev_vn.cc @@ -141,10 +141,10 @@ void HaStaleDevVnEntry::TriggerAck(HaStaleL2RouteTable *table) { assert(old_l2_table_ == table); if (l2_table_ != NULL) { old_l2_table_ = NULL; - object->NotifyEvent(this, KSyncEntry::ADD_ACK); + object->SafeNotifyEvent(this, KSyncEntry::ADD_ACK); } else { old_l2_table_ = NULL; - object->NotifyEvent(this, KSyncEntry::DEL_ACK); + object->SafeNotifyEvent(this, KSyncEntry::DEL_ACK); } } diff --git a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/logical_switch_ovsdb.cc b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/logical_switch_ovsdb.cc index 270b162ae48..b3cbab509f3 100644 --- a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/logical_switch_ovsdb.cc +++ b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/logical_switch_ovsdb.cc @@ -147,7 +147,7 @@ void LogicalSwitchEntry::DeleteMsg(struct ovsdb_idl_txn *txn) { void LogicalSwitchEntry::OvsdbChange() { if (!IsResolved()) - table_->NotifyEvent(this, KSyncEntry::ADD_CHANGE_REQ); + table_->SafeNotifyEvent(this, KSyncEntry::ADD_CHANGE_REQ); } const std::string &LogicalSwitchEntry::name() const { diff --git a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/multicast_mac_local_ovsdb.cc b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/multicast_mac_local_ovsdb.cc index 3eba18ebdfc..b2225469dfb 100644 --- a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/multicast_mac_local_ovsdb.cc +++ b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/multicast_mac_local_ovsdb.cc @@ -62,7 +62,7 @@ void MulticastMacLocalEntry::EvaluateVrfDependency(VrfEntry *vrf) { OnVrfDelete(); // Issue a ADD_CHANGE_REQ to push entry into defer state. // Whenever VN entry is back this will be updated. - table_->NotifyEvent(this, KSyncEntry::ADD_CHANGE_REQ); + table_->SafeNotifyEvent(this, KSyncEntry::ADD_CHANGE_REQ); } } diff --git a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_entry.cc b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_entry.cc index 1b796d1b058..1d1f9223c71 100644 --- a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_entry.cc +++ b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_entry.cc @@ -47,7 +47,7 @@ void OvsdbEntry::Ack(bool success) { } OvsdbObject *object = static_cast(GetObject()); - object->NotifyEvent(this, ack_event_); + object->SafeNotifyEvent(this, ack_event_); } OvsdbDBEntry::OvsdbDBEntry(OvsdbDBObject *table) : KSyncDBEntry(), table_(table), @@ -206,9 +206,9 @@ void OvsdbDBEntry::Ack(bool success) { if (success) { if (IsDelAckWaiting()) - object->NotifyEvent(this, KSyncEntry::DEL_ACK); + object->SafeNotifyEvent(this, KSyncEntry::DEL_ACK); else if (IsAddChangeAckWaiting()) - object->NotifyEvent(this, KSyncEntry::ADD_ACK); + object->SafeNotifyEvent(this, KSyncEntry::ADD_ACK); else delete this; } else { @@ -225,7 +225,7 @@ void OvsdbDBEntry::Ack(bool success) { } if (trigger_ack) { - object->NotifyEvent(this, KSyncEntry::DEL_ACK); + object->SafeNotifyEvent(this, KSyncEntry::DEL_ACK); } } else if (IsAddChangeAckWaiting()) { OVSDB_TRACE(Error, "Add Transaction failed for " + ToString()); @@ -236,7 +236,7 @@ void OvsdbDBEntry::Ack(bool success) { object->Change(this); } - object->NotifyEvent(this, KSyncEntry::ADD_ACK); + object->SafeNotifyEvent(this, KSyncEntry::ADD_ACK); } else { // should never happen assert(0); diff --git a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_object.cc b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_object.cc index cbbb52b4856..da9ed1c82d0 100644 --- a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_object.cc +++ b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/ovsdb_object.cc @@ -128,7 +128,7 @@ void OvsdbDBObject::NotifyDeleteOvsdb(OvsdbDBEntry *key, // we were not waiting for delete to happen, state for // OVSDB server and client mismatch happend. // Trigger Add/Change Req on entry to resync - NotifyEvent(entry, KSyncEntry::ADD_CHANGE_REQ); + SafeNotifyEvent(entry, KSyncEntry::ADD_CHANGE_REQ); } } } diff --git a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/unicast_mac_remote_ovsdb.cc b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/unicast_mac_remote_ovsdb.cc index 3b20792479d..74fcef97d93 100644 --- a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/unicast_mac_remote_ovsdb.cc +++ b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/unicast_mac_remote_ovsdb.cc @@ -167,7 +167,7 @@ void UnicastMacRemoteEntry::DeleteMsg(struct ovsdb_idl_txn *txn) { void UnicastMacRemoteEntry::OvsdbChange() { if (!IsResolved()) - table_->NotifyEvent(this, KSyncEntry::ADD_CHANGE_REQ); + table_->SafeNotifyEvent(this, KSyncEntry::ADD_CHANGE_REQ); } bool UnicastMacRemoteEntry::Sync(DBEntry *db_entry) { diff --git a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/vrf_ovsdb.cc b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/vrf_ovsdb.cc index 678461147e7..91c83662828 100644 --- a/src/vnsw/agent/ovs_tor_agent/ovsdb_client/vrf_ovsdb.cc +++ b/src/vnsw/agent/ovs_tor_agent/ovsdb_client/vrf_ovsdb.cc @@ -100,7 +100,7 @@ void VrfOvsdbEntry::TriggerAck(UnicastMacRemoteTable *table) { OvsdbDBObject *object = static_cast(GetObject()); assert(route_table_ == table); route_table_ = NULL; - object->NotifyEvent(this, KSyncEntry::DEL_ACK); + object->SafeNotifyEvent(this, KSyncEntry::DEL_ACK); } VrfOvsdbObject::VrfOvsdbObject(OvsdbClientIdl *idl) : OvsdbDBObject(idl, true) {