Skip to content

Commit

Permalink
* Backoff publishing of preference for a path if its flapping
Browse files Browse the repository at this point in the history
If a given path flaps for more than 5 times in 5 seconds period
of time, then agent will backoff for 5 seconds and retry for
path update after 5 period of time, similarly if path flaps
again then agent would exponentially backoff with maximum
time of 100 seconds.
Test case for same.
Closes-bug:#1461774,#1373135

Change-Id: I7429dc01354784baf0090c13b0e94e9ae990bcf0
(cherry picked from commit d70aa21)
  • Loading branch information
naveen-n committed Jun 17, 2015
1 parent 6e386f8 commit 88edb11
Show file tree
Hide file tree
Showing 5 changed files with 356 additions and 21 deletions.
1 change: 1 addition & 0 deletions src/vnsw/agent/oper/agent.sandesh
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ traceobject sandesh PathPreferenceTrace {
3: i32 preference;
4: i32 sequence;
5: string state;
6: i32 retry_timeout;
}

request sandesh MirrorCreateReq {
Expand Down
158 changes: 140 additions & 18 deletions src/vnsw/agent/oper/path_preference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ struct EvActiveActiveMode : sc::event<EvActiveActiveMode> {
}
};

struct EvControlNodeInSync : sc::event<EvControlNodeInSync> {
EvControlNodeInSync() {
}
static const char *Name() {
return "Control node route in sync";
}
};

struct Init : public sc::state<Init, PathPreferenceSM> {
typedef mpl::list<
sc::custom_reaction<EvStart>
Expand All @@ -81,7 +89,8 @@ struct WaitForTraffic : sc::state<WaitForTraffic, PathPreferenceSM> {
sc::custom_reaction<EvTrafficSeen>,
sc::custom_reaction<EvSeqChange>,
sc::custom_reaction<EvWaitForTraffic>,
sc::custom_reaction<EvActiveActiveMode>
sc::custom_reaction<EvActiveActiveMode>,
sc::custom_reaction<EvControlNodeInSync>
> reactions;

WaitForTraffic(my_context ctx) : my_base(ctx) {
Expand Down Expand Up @@ -111,21 +120,30 @@ struct WaitForTraffic : sc::state<WaitForTraffic, PathPreferenceSM> {
sc::result react(const EvActiveActiveMode &event) {
return transit<ActiveActiveState>();
}

sc::result react(const EvControlNodeInSync &event) {
return discard_event();
}
};

struct TrafficSeen : sc::state<TrafficSeen, PathPreferenceSM> {
typedef mpl::list<
sc::custom_reaction<EvTrafficSeen>,
sc::custom_reaction<EvSeqChange>,
sc::custom_reaction<EvWaitForTraffic>,
sc::custom_reaction<EvActiveActiveMode>
sc::custom_reaction<EvActiveActiveMode>,
sc::custom_reaction<EvControlNodeInSync>
> reactions;

TrafficSeen(my_context ctx) : my_base(ctx) {
PathPreferenceSM *state_machine = &context<PathPreferenceSM>();
//Enqueue a route change
if (state_machine->wait_for_traffic() == true) {
uint32_t seq = state_machine->max_sequence();
state_machine->UpdateFlapTime();
if (state_machine->flap_count() == 0) {
state_machine->DecreaseRetryTimeout();
}
uint32_t seq = state_machine->max_sequence();
state_machine->set_wait_for_traffic(false);
seq++;
state_machine->set_max_sequence(seq);
Expand All @@ -148,22 +166,38 @@ struct TrafficSeen : sc::state<TrafficSeen, PathPreferenceSM> {
PathPreferenceSM *state_machine = &context<PathPreferenceSM>();
if (event.sequence_ > state_machine->sequence()) {
state_machine->set_max_sequence(event.sequence_);
return transit<WaitForTraffic>();
if (state_machine->IsPathFlapping()) {
//If path is continuosly flapping
//delay wihtdrawing of route
if (state_machine->RetryTimerRunning() == false) {
state_machine->IncreaseRetryTimeout();
state_machine->StartRetryTimer();
state_machine->Log("Back off and retry update");
}
return discard_event();
}
}
return discard_event();
return transit<WaitForTraffic>();
}

sc::result react(const EvActiveActiveMode &event) {
return transit<ActiveActiveState>();
}

sc::result react(const EvControlNodeInSync &event) {
PathPreferenceSM *state_machine = &context<PathPreferenceSM>();
state_machine->Log("in sync with control-node");
return discard_event();
}
};

struct ActiveActiveState : sc::state<ActiveActiveState, PathPreferenceSM> {
typedef mpl::list<
sc::custom_reaction<EvTrafficSeen>,
sc::custom_reaction<EvSeqChange>,
sc::custom_reaction<EvWaitForTraffic>,
sc::custom_reaction<EvActiveActiveMode>
sc::custom_reaction<EvActiveActiveMode>,
sc::custom_reaction<EvControlNodeInSync>
> reactions;

ActiveActiveState(my_context ctx) : my_base(ctx) {
Expand Down Expand Up @@ -197,17 +231,95 @@ struct ActiveActiveState : sc::state<ActiveActiveState, PathPreferenceSM> {
sc::result react(const EvActiveActiveMode &event) {
return discard_event();
}
};


sc::result react(const EvControlNodeInSync &event) {
return discard_event();
}
};

PathPreferenceSM::PathPreferenceSM(Agent *agent, const Peer *peer,
AgentRoute *rt): agent_(agent), peer_(peer), rt_(rt),
path_preference_(0, PathPreference::LOW, false, false), max_sequence_(0) {
path_preference_(0, PathPreference::LOW, false, false), max_sequence_(0),
timer_(NULL), timeout_(kMinInterval),
flap_count_(0) {
initiate();
process_event(EvStart());
}

PathPreferenceSM::~PathPreferenceSM() {
if (timer_ != NULL) {
timer_->Cancel();
TimerManager::DeleteTimer(timer_);
}
timer_ = NULL;
}

bool PathPreferenceSM::Retry() {
flap_count_ = 0;
process_event(EvWaitForTraffic());
return false;
}

void PathPreferenceSM::StartRetryTimer() {
if (timer_ == NULL) {
timer_ = TimerManager::CreateTimer(
*(agent_->event_manager())->io_service(),
"Stale cleanup timer",
TaskScheduler::GetInstance()->GetTaskId("db::DBTable"),
0, false);
}
timer_->Start(timeout_,
boost::bind(&PathPreferenceSM::Retry, this));
}

void PathPreferenceSM::CancelRetryTimer() {
if (timer_ == NULL) {
return;
}
timer_->Cancel();
}

bool PathPreferenceSM::RetryTimerRunning() {
if (timer_ == NULL) {
return false;
}
return timer_->running();
}

void PathPreferenceSM::IncreaseRetryTimeout() {
timeout_ = timeout_ * 2;
if (timeout_ > kMaxInterval) {
timeout_ = kMaxInterval;
}
}

void PathPreferenceSM::DecreaseRetryTimeout() {
timeout_ = timeout_ / 2;
if (timeout_ < kMinInterval) {
timeout_ = kMinInterval;
}
}

void PathPreferenceSM::UpdateFlapTime() {
uint64_t time_sec = (UTCTimestampUsec() - last_high_priority_change_at_)/1000;

//Update last flap time
last_high_priority_change_at_ = UTCTimestampUsec();
if (time_sec < timeout_ + kMinInterval) {
flap_count_++;
} else {
flap_count_ = 0;
}
}

bool PathPreferenceSM::IsPathFlapping() const {
if (flap_count_ >= kMaxFlapCount) {
return true;
}

return false;
}

void PathPreferenceSM::Process() {
uint32_t max_sequence = 0;
const AgentPath *best_path = NULL;
Expand All @@ -225,6 +337,9 @@ void PathPreferenceSM::Process() {
it != rt_->GetPathList().end(); ++it) {
const AgentPath *path =
static_cast<const AgentPath *>(it.operator->());
if (path == local_path) {
continue;
}
//Get best preference and sequence no from all BGP peer
if (max_sequence < path->sequence()) {
max_sequence = path->sequence();
Expand All @@ -236,24 +351,31 @@ void PathPreferenceSM::Process() {
return;
}

if (max_sequence > sequence()) {
process_event(EvSeqChange(max_sequence));
} else if (sequence() == max_sequence &&
best_path->ComputeNextHop(agent_) != local_path->ComputeNextHop(agent_)) {
//Control node chosen path and local path are different
//move to wait for traffic state
process_event(EvWaitForTraffic());
} else if (ecmp() == true) {
if (ecmp() == true) {
path_preference_.set_ecmp(local_path->path_preference().ecmp());
//Route transition from ECMP to non ECMP,
//move to wait for traffic state
process_event(EvWaitForTraffic());
return;
}

if (max_sequence > sequence()) {
process_event(EvSeqChange(max_sequence));
} else if (sequence() == max_sequence &&
best_path->ComputeNextHop(agent_) ==
local_path->ComputeNextHop(agent_)) {
//Control node chosen path and local path are same
process_event(EvControlNodeInSync());
} else if (sequence() == max_sequence &&
best_path->ComputeNextHop(agent_) !=
local_path->ComputeNextHop(agent_)) {
process_event(EvWaitForTraffic());
}
}

void PathPreferenceSM::Log(std::string state) {
PATH_PREFERENCE_TRACE(rt_->vrf()->GetName(), rt_->GetAddressString(),
preference(), sequence(), state);
preference(), sequence(), state, timeout());
}

void PathPreferenceSM::EnqueuePathChange() {
Expand Down
31 changes: 31 additions & 0 deletions src/vnsw/agent/oper/path_preference.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,22 @@ do { \
class PathPreferenceSM:
public sc::state_machine<PathPreferenceSM, Init> {
public:
static const uint32_t kMinInterval = 5 * 1000;
static const uint32_t kMaxInterval = 100 * 1000;
static const uint32_t kMaxFlapCount = 5;
PathPreferenceSM(Agent *agent, const Peer *peer,
AgentRoute *rt);
~PathPreferenceSM();
uint32_t sequence() const {return path_preference_.sequence();}
uint32_t preference() const {return path_preference_.preference();}
bool wait_for_traffic() const {return path_preference_.wait_for_traffic();}
bool ecmp() const {return path_preference_.ecmp();}
uint32_t timeout() const { return timeout_;}

uint64_t last_high_priority_change_at() const {
return last_high_priority_change_at_;
}
uint32_t flap_count() const { return flap_count_;}

void set_sequence(uint32_t seq_no) {
path_preference_.set_sequence(seq_no);
Expand All @@ -59,19 +69,40 @@ class PathPreferenceSM:
max_sequence_ = seq;
}

void set_timeout(uint32_t timeout) {
timeout_ = timeout;
}

void set_last_high_priority_change_at(uint64_t timestamp) {
last_high_priority_change_at_ = timestamp;
}

bool seen() { return seen_; }
uint32_t max_sequence() const { return max_sequence_;}
void Process();
void Delete();
void Log(std::string state);
void EnqueuePathChange();
bool Retry();
void StartRetryTimer();
void CancelRetryTimer();
bool RetryTimerRunning();
void IncreaseRetryTimeout();
void DecreaseRetryTimeout();
bool IsPathFlapping() const;
bool IsPathStable() const;
void UpdateFlapTime();
private:
Agent *agent_;
const Peer *peer_;
AgentRoute *rt_;
PathPreference path_preference_;
uint32_t max_sequence_;
bool seen_;
Timer *timer_;
uint32_t timeout_;
uint64_t last_high_priority_change_at_;
uint32_t flap_count_;
};

//Per Route state machine containing a map for all
Expand Down

0 comments on commit 88edb11

Please sign in to comment.