Skip to content

Commit

Permalink
Implement heuristic to disable SchedulingGroup splitting
Browse files Browse the repository at this point in the history
It's not worth trying to split a group with a very large number of
(ribout, peer) members.  The chances that such a group can be split
are pretty low and the amount of effort spent in figuring it out is
quite large.

Implement a heuristic/hack to make a group split ineligible once it
has more than a certain number of members. This is a sticky property
i.e. the group can never be split once this property gets set.

If/when more parallelism is required on the xmpp send side, it can
be achieved by creating multiple RibOuts for the same tables based
on a hash of the agent name/address.

Change-Id: I601f9aadc4e21a835b7e320f8aae430b5589929d
Partial-Bug: 1461322
  • Loading branch information
Nischal Sheth committed Jun 5, 2015
1 parent 58bd7f4 commit 4783643
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 3 deletions.
30 changes: 30 additions & 0 deletions src/bgp/scheduling_group.cc
Expand Up @@ -462,6 +462,8 @@ class SchedulingGroup::Worker : public Task {
SchedulingGroup::SchedulingGroup()
: running_(false),
disabled_(false),
split_disabled_(false),
member_count_(0),
worker_task_(NULL) {
if (send_task_id_ == -1) {
TaskScheduler *scheduler = TaskScheduler::GetInstance();
Expand Down Expand Up @@ -491,6 +493,7 @@ void SchedulingGroup::Add(RibOut *ribout, IPeerUpdate *peer) {
RibState *rs = rib_state_imap_.Locate(ribout);
PeerState *ps = peer_state_imap_.Locate(peer);
rs->Add(ps);
IncrementMemberCount();
ps->Add(rs);
}

Expand All @@ -507,6 +510,7 @@ void SchedulingGroup::Remove(RibOut *ribout, IPeerUpdate *peer) {
assert(rs != NULL);
assert(ps != NULL);
rs->Remove(ps);
DecrementMemberCount();
ps->Remove(rs);
if (rs->empty()) {
rib_state_imap_.Remove(ribout, rs->index());
Expand Down Expand Up @@ -657,6 +661,7 @@ void SchedulingGroup::Merge(SchedulingGroup *rhs) {
rs->RibStateMove(prev_rs);
}
rs->Add(ps);
IncrementMemberCount();
ps->RibStateMove(prev_ps, rs->index(), ribiter.index());
}

Expand Down Expand Up @@ -717,6 +722,7 @@ void SchedulingGroup::Split(SchedulingGroup *rhs, const RibOutList &rg1,
// for the new PeerState. Need to make sure that we do not lose
// the PeerRibState information from the old PeerState.
rs->Add(ps);
rhs->IncrementMemberCount();
ps->RibStateMove(prev_ps, rs->index(), GetRibOutIndex(ribout));

// Remove the (RibOut, IPeerUpdate) pair from this SchedulingGroup.
Expand Down Expand Up @@ -1177,6 +1183,25 @@ void SchedulingGroup::UpdatePeer(IPeerUpdate *peer) {
}
}

//
// Decrement count of members in this SchedulingGroup.
//
void SchedulingGroup::DecrementMemberCount() {
member_count_--;
}

//
// Increment count of members in this SchedulingGroup.
// Mark the SchedulingGroup as split disabled if the count exceeds the split
// threshold. Note that the split disabled property is sticky i.e. it's never
// reset.
//
void SchedulingGroup::IncrementMemberCount() {
if (++member_count_ >= kSplitThreshold) {
split_disabled_ = true;
}
}

//
// Check invariants for the SchedulingGroup.
//
Expand Down Expand Up @@ -1338,6 +1363,11 @@ void SchedulingGroupManager::Leave(RibOut *ribout, IPeerUpdate *peer) {
return;
}

// Bail if the group is not eligible to be split.
if (sg->split_disabled()) {
return;
}

// Now check if it's possible to split the group. Given how we define a
// scheduling group and the logic of how joins are handled, we simply need
// to check if the (peer, ribout) combo being removed was the only reason
Expand Down
9 changes: 9 additions & 0 deletions src/bgp/scheduling_group.h
Expand Up @@ -57,6 +57,8 @@ class GroupPeerSet : public BitSet {
//
class SchedulingGroup {
public:
static const uint32_t kSplitThreshold = 8192;

class RibState;

typedef std::vector<RibOut *> RibOutList;
Expand Down Expand Up @@ -99,10 +101,15 @@ class SchedulingGroup {
void GetRibOutList(const RibStateList &rs_list, RibOutList *ro_list) const;
void GetPeerList(PeerList *plist) const;

void DecrementMemberCount();
void IncrementMemberCount();

bool CheckInvariants() const;

void clear();
bool empty() const;
bool split_disabled() const { return split_disabled_; }
uint32_t member_count() const { return member_count_; }

// For unit testing.
void set_disabled(bool disabled);
Expand Down Expand Up @@ -162,6 +169,8 @@ class SchedulingGroup {
tbb::mutex mutex_;
bool running_;
bool disabled_;
bool split_disabled_;
uint32_t member_count_;
WorkQueue work_queue_;
Worker *worker_task_;

Expand Down
144 changes: 141 additions & 3 deletions src/bgp/test/scheduling_group_test.cc
Expand Up @@ -187,36 +187,43 @@ TEST_F(SchedulingGroupManagerTest, TwoTablesTwoPeers1a) {
EXPECT_EQ(1, sgman_.size());
sg = rp1->GetSchedulingGroup();
EXPECT_TRUE(sg != NULL);
EXPECT_EQ(1, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p1.get()), sg);

Join(rp2.get(), p1.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(2, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp2.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p1.get()), sg);

Join(rp1.get(), p2.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(3, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg);

Join(rp2.get(), p2.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(4, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp2.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg);

Leave(rp1.get(), p1.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(3, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p1.get()), sg);

Leave(rp2.get(), p1.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(2, sg->member_count());
EXPECT_TRUE(sgman_.PeerGroup(p1.get()) == NULL);
EXPECT_EQ(sgman_.RibOutGroup(rp2.get()), sg);

Leave(rp1.get(), p2.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(1, sg->member_count());
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg);
EXPECT_TRUE(sgman_.RibOutGroup(rp1.get()) == NULL);

Expand All @@ -242,36 +249,43 @@ TEST_F(SchedulingGroupManagerTest, TwoTablesTwoPeers1b) {
EXPECT_EQ(1, sgman_.size());
sg = rp1->GetSchedulingGroup();
EXPECT_TRUE(sg != NULL);
EXPECT_EQ(1, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p1.get()), sg);

Join(rp1.get(), p2.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(2, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg);

Join(rp2.get(), p1.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(3, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp2.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p1.get()), sg);

Join(rp2.get(), p2.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(4, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp2.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg);

Leave(rp1.get(), p1.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(3, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p1.get()), sg);

Leave(rp1.get(), p2.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(2, sg->member_count());
EXPECT_TRUE(sgman_.RibOutGroup(rp1.get()) == NULL);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg);

Leave(rp2.get(), p1.get());
EXPECT_EQ(1, sgman_.size());
EXPECT_EQ(1, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp2.get()), sg);
EXPECT_TRUE(sgman_.PeerGroup(p1.get()) == NULL);

Expand All @@ -297,20 +311,23 @@ TEST_F(SchedulingGroupManagerTest, TwoTablesTwoPeers2) {
EXPECT_EQ(1, sgman_.size());
sg_a = rp1->GetSchedulingGroup();
EXPECT_TRUE(sg_a != NULL);
EXPECT_EQ(1, sg_a->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg_a);
EXPECT_EQ(sgman_.PeerGroup(p1.get()), sg_a);

Join(rp2.get(), p2.get());
EXPECT_EQ(2, sgman_.size());
sg_b = rp2->GetSchedulingGroup();
EXPECT_TRUE(sg_b != NULL);
EXPECT_EQ(1, sg_b->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp2.get()), sg_b);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg_b);

// Merge happens here.
Join(rp2.get(), p1.get());
EXPECT_EQ(1, sgman_.size());
sg = rp1->GetSchedulingGroup();
EXPECT_EQ(3, sg->member_count());
EXPECT_TRUE(sg_a == sg || sg_b == sg);
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.RibOutGroup(rp2.get()), sg);
Expand All @@ -319,11 +336,15 @@ TEST_F(SchedulingGroupManagerTest, TwoTablesTwoPeers2) {

Join(rp1.get(), p2.get());
EXPECT_EQ(1, sgman_.size());
sg = rp1->GetSchedulingGroup();
EXPECT_EQ(4, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg);

Leave(rp1.get(), p2.get());
EXPECT_EQ(1, sgman_.size());
sg = rp1->GetSchedulingGroup();
EXPECT_EQ(3, sg->member_count());
EXPECT_EQ(sgman_.RibOutGroup(rp1.get()), sg);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg);

Expand All @@ -333,6 +354,8 @@ TEST_F(SchedulingGroupManagerTest, TwoTablesTwoPeers2) {
sg_a = rp1->GetSchedulingGroup();
sg_b = rp2->GetSchedulingGroup();
EXPECT_TRUE(sg_a == sg || sg_b == sg);
EXPECT_EQ(1, sg_a->member_count());
EXPECT_EQ(1, sg_b->member_count());
EXPECT_EQ(sgman_.PeerGroup(p1.get()), sg_a);
EXPECT_EQ(sgman_.PeerGroup(p2.get()), sg_b);

Expand Down Expand Up @@ -471,7 +494,7 @@ TEST_F(SchedulingGroupManagerTest, LeaveScaling1) {

// Create large number of additional RibOuts.
vector<RibOut *> ribouts;
for (int idx = 0; idx < 8192; ++idx) {
for (int idx = 0; idx < SchedulingGroup::kSplitThreshold / 2; ++idx) {
RibOut *ribout(new RibOut(inetvpn_table_, &sgman_,
RibExportPolicy(BgpProto::IBGP, RibExportPolicy::BGP, idx, 0)));
ribouts.push_back(ribout);
Expand Down Expand Up @@ -518,7 +541,7 @@ TEST_F(SchedulingGroupManagerTest, LeaveScaling2) {

// Create large number of additional ribouts.
vector<RibOut *> ribouts;
for (int idx = 0; idx < 8192; ++idx) {
for (int idx = 0; idx < SchedulingGroup::kSplitThreshold / 2; ++idx) {
RibOut *ribout(new RibOut(inetvpn_table_, &sgman_,
RibExportPolicy(BgpProto::IBGP, RibExportPolicy::BGP, idx, 0)));
ribouts.push_back(ribout);
Expand Down Expand Up @@ -565,7 +588,7 @@ TEST_F(SchedulingGroupManagerTest, LeaveScaling3) {

// Create large number of additional RibOuts.
vector<RibOut *> ribouts;
for (int idx = 0; idx < 8192; ++idx) {
for (int idx = 0; idx < SchedulingGroup::kSplitThreshold / 2; ++idx) {
RibOut *ribout(new RibOut(inetvpn_table_, &sgman_,
RibExportPolicy(BgpProto::IBGP, RibExportPolicy::BGP, idx, 0)));
ribouts.push_back(ribout);
Expand Down Expand Up @@ -601,6 +624,121 @@ TEST_F(SchedulingGroupManagerTest, LeaveScaling3) {
STLDeleteValues(&peers);
}

//
// Exercise the split disable logic.
//
TEST_F(SchedulingGroupManagerTest, SplitDisabled1) {
// Create 2 peers and join them to a common ribout.
vector<BgpTestPeer *> peers;
BgpTestPeer *test_peer1(new BgpTestPeer());
BgpTestPeer *test_peer2(new BgpTestPeer());
peers.push_back(test_peer1);
peers.push_back(test_peer2);
RibOut ribout_common(inetvpn_table_, &sgman_, RibExportPolicy());
Join(&ribout_common, test_peer1);
Join(&ribout_common, test_peer2);
EXPECT_EQ(1, sgman_.size());

// Create large number of additional ribouts.
vector<RibOut *> ribouts;
for (int idx = 0; idx < SchedulingGroup::kSplitThreshold; ++idx) {
RibOut *ribout(new RibOut(inetvpn_table_, &sgman_,
RibExportPolicy(BgpProto::IBGP, RibExportPolicy::BGP, idx, 0)));
ribouts.push_back(ribout);
}

// Join the test peers to odd/even additional ribouts.
size_t idx = 0;
BOOST_FOREACH(RibOut *ribout, ribouts) {
Join(ribout, peers[idx % 2]);
idx++;
}
EXPECT_EQ(1, sgman_.size());
SchedulingGroup *sg1 = ribouts[0]->GetSchedulingGroup();
EXPECT_TRUE(sg1->split_disabled());
EXPECT_EQ(SchedulingGroup::kSplitThreshold + 2, sg1->member_count());

// Leave test peers from common ribout and verify that group is not split.
Leave(&ribout_common, test_peer1);
Leave(&ribout_common, test_peer2);
EXPECT_EQ(1, sgman_.size());
SchedulingGroup *sg2 = ribouts[0]->GetSchedulingGroup();
EXPECT_TRUE(sg2->split_disabled());
EXPECT_EQ(SchedulingGroup::kSplitThreshold + 0, sg2->member_count());
EXPECT_EQ(sg1, sg2);

// Leave the test peer from odd/even additional ribouts.
idx = 0;
BOOST_FOREACH(RibOut *ribout, ribouts) {
Leave(ribout, peers[idx % 2]);
idx++;
}
EXPECT_EQ(0, sgman_.size());

STLDeleteValues(&ribouts);
STLDeleteValues(&peers);
}

//
// Exercise the split disable logic after a merge.
//
TEST_F(SchedulingGroupManagerTest, SplitDisabled2) {
// Create 2 peers.
vector<BgpTestPeer *> peers;
BgpTestPeer *test_peer1(new BgpTestPeer());
BgpTestPeer *test_peer2(new BgpTestPeer());
peers.push_back(test_peer1);
peers.push_back(test_peer2);

// Create large number of additional RibOuts.
vector<RibOut *> ribouts;
for (int idx = 0; idx < SchedulingGroup::kSplitThreshold; ++idx) {
RibOut *ribout(new RibOut(inetvpn_table_, &sgman_,
RibExportPolicy(BgpProto::IBGP, RibExportPolicy::BGP, idx, 0)));
ribouts.push_back(ribout);
}

// Join the test peers to odd/even additional RibOuts.
size_t idx = 0;
BOOST_FOREACH(RibOut *ribout, ribouts) {
Join(ribout, peers[idx % 2]);
idx++;
}
EXPECT_EQ(2, sgman_.size());
EXPECT_EQ(SchedulingGroup::kSplitThreshold / 2,
ribouts[0]->GetSchedulingGroup()->member_count());

// Join the test peers to a common ribout so that the groups get merged.
RibOut ribout_common(inetvpn_table_, &sgman_, RibExportPolicy());
Join(&ribout_common, test_peer1);
Join(&ribout_common, test_peer2);
EXPECT_EQ(1, sgman_.size());
SchedulingGroup *sg1 = ribouts[0]->GetSchedulingGroup();
EXPECT_TRUE(sg1->split_disabled());
EXPECT_EQ(SchedulingGroup::kSplitThreshold + 2, sg1->member_count());

// Leave test peers from common ribout and verify that group is not split.
Leave(&ribout_common, test_peer1);
Leave(&ribout_common, test_peer2);
EXPECT_EQ(1, sgman_.size());
SchedulingGroup *sg2 = ribouts[0]->GetSchedulingGroup();
EXPECT_TRUE(sg2->split_disabled());
EXPECT_EQ(SchedulingGroup::kSplitThreshold + 0, sg1->member_count());
EXPECT_EQ(sg1, sg2);

// Leave the test peers from odd/even additional RibOuts.
idx = 0;
BOOST_FOREACH(RibOut *ribout, ribouts) {
Leave(ribout, peers[idx % 2]);
idx++;
}
EXPECT_EQ(0, sgman_.size());

// Cleanup.
STLDeleteValues(&ribouts);
STLDeleteValues(&peers);
}

// Parameterize number of entries in the work queue and the order in which
// the entries are added.

Expand Down

0 comments on commit 4783643

Please sign in to comment.