/
bgp_peer_close.cc
330 lines (276 loc) · 11.2 KB
/
bgp_peer_close.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
/*
* Copyright (c) 2013 Juniper Networks, Inc. All rights reserved.
*/
#include "bgp/bgp_peer_close.h"
#include "bgp/bgp_log.h"
#include "bgp/bgp_peer_membership.h"
#include "bgp/bgp_route.h"
#include "bgp/bgp_server.h"
//
// Create an instance of PeerCloseManager with back reference to the parent
// IPeer
//
PeerCloseManager::PeerCloseManager(IPeer *peer) :
peer_(peer),
close_in_progress_(false),
close_request_pending_(false),
config_deleted_(false),
stale_timer_(NULL),
stale_timer_running_(false),
start_stale_timer_(false) {
if (peer->server()) {
stale_timer_ = TimerManager::CreateTimer(*peer->server()->ioservice(),
"Graceful Restart StaleTimer");
}
}
PeerCloseManager::~PeerCloseManager() {
TimerManager::DeleteTimer(stale_timer_);
}
//
// Process RibIn staling related activities during peer closure
//
// Return true if at least ome time is started, false otherwise
//
void PeerCloseManager::StartStaleTimer() {
// Launch a timer to flush either the peer or the stale routes
// TODO(ananth): Use timer value from configuration
stale_timer_->Start(PeerCloseManager::kDefaultGracefulRestartTime * 1000,
boost::bind(&PeerCloseManager::StaleTimerCallback, this));
}
//
// Concurrency: Runs in the context of the BGP peer rib membership task.
//
// Callback provided to bgp peer rib membership manager to indicate the action
// to perform during RibIn close
//
int PeerCloseManager::GetCloseTypeForTimerCallback(IPeerRib *peer_rib) {
// If peer_rib is still stale, the peer did not come back up or did not
// register for this table after coming back up. In either case, delete
// the rib in
if (peer_rib->IsStale()) {
return MembershipRequest::RIBIN_DELETE;
}
//
// Peer has come back up and registered with this table again. Sweep all
// the stale paths and remove those that did not reappear in the new session
//
return MembershipRequest::RIBIN_SWEEP;
}
//
// Concurrency: Runs in the context of the BGP peer rib membership task.
//
// Callback called from membership manager indicating that RibIn sweep process
// for a table is complete. We don't have do any thing other than logging a
// debug message here
//
void PeerCloseManager::SweepComplete(IPeer *ipeer, BgpTable *table) {
}
//
// Route stale timer callback. If the peer has come back up, sweep routes for
// those address families that are still active. Delete the rest
//
bool PeerCloseManager::StaleTimerCallback() {
// Protect this method from possible parallel new close request
tbb::recursive_mutex::scoped_lock lock(mutex_);
// If the peer is back up and this address family is still supported,
// sweep old paths which may not have come back in the new session
if (peer_->IsReady()) {
peer_->server()->membership_mgr()->UnregisterPeer(peer_,
boost::bind(&PeerCloseManager::GetCloseTypeForTimerCallback, this,
_1),
boost::bind(&PeerCloseManager::SweepComplete, this, _1, _2));
} else {
peer_->server()->membership_mgr()->UnregisterPeer(peer_,
boost::bind(&PeerCloseManager::GetCloseTypeForTimerCallback, this,
_1),
boost::bind(&PeerCloseManager::CloseComplete, this, _1, _2, true,
false));
}
// Timer callback is complete. Reset the appropriate flags
stale_timer_running_ = false;
start_stale_timer_ = false;
boost::system::error_code ec;
stale_timer_->Cancel();
return false;
}
bool PeerCloseManager::IsCloseInProgress() {
tbb::recursive_mutex::scoped_lock lock(mutex_);
return close_in_progress_;
}
//
// Concurrency: Runs in the context of the BGP peer rib membership task.
//
// Close process for this peer in terms of walking RibIns and RibOuts are
// complete. Do the final cleanups necessary and notify interested party
//
void PeerCloseManager::CloseComplete(IPeer *ipeer, BgpTable *table,
bool from_timer, bool gr_cancelled) {
tbb::recursive_mutex::scoped_lock lock(mutex_);
BGP_LOG_PEER(Event, peer_, SandeshLevel::SYS_INFO, BGP_LOG_FLAG_ALL,
BGP_PEER_DIR_NA, "Close procedure completed");
close_in_progress_ = false;
bool close_request_pending = close_request_pending_;
bool is_xmpp = ipeer->IsXmppPeer();
// Do any peer specific close actions
IPeerClose *peer_close = peer_->peer_close();
if (!peer_close->CloseComplete(from_timer, gr_cancelled)) {
if (start_stale_timer_) {
// If any stale timer has to be launched, then to wait for some
// time hoping for the peer (and the paths) to come back up
StartStaleTimer();
stale_timer_running_ = true;
}
return;
}
// Peer is deleted. But it is possible that delete request came while
// we were in the midst of cleaning up. Restart close process again
// if required. Xmpp peers are not created and deleted off configuration
if (close_request_pending && !is_xmpp) {
close_request_pending_ = false;
// New close request was posted in the midst of previous close.
// Post a close again, as this peer has been deleted.
Close();
}
}
//
// Get the type of RibIn close action at start (Not during graceful restart
// timer callback, where in we walk the Rib again to sweep the routes)
//
int PeerCloseManager::GetActionAtStart(IPeerRib *peer_rib) {
int action = MembershipRequest::INVALID;
if (peer_rib->IsRibOutRegistered()) {
action |= static_cast<int>(MembershipRequest::RIBOUT_DELETE);
}
// If graceful restart timer is already running, then this is a second
// close before previous restart has completed. Abort graceful restart
// and delete the routes instead
if (stale_timer_running_) {
action |= static_cast<int>(MembershipRequest::RIBIN_DELETE);
stale_timer_running_ = false;
return action;
}
// Check if the close is graceful or or not. If the peer is deleted,
// no need to retain the ribin
if (peer_rib->IsRibInRegistered()) {
if (peer_->peer_close()->IsCloseGraceful()) {
action |= MembershipRequest::RIBIN_STALE;
peer_rib->SetStale();
//
// Note down that a timer must be started after this close process
// is complete
//
start_stale_timer_ = true;
} else {
action |= MembershipRequest::RIBIN_DELETE;
}
}
return (action);
}
//
// Delete all Ribs of this peer. To be called during peer close process of
// both BgpPeer ad XmppPeers
//
void PeerCloseManager::Close() {
tbb::recursive_mutex::scoped_lock lock(mutex_);
// Call IPeer specific close()
IPeerClose *peer_close = peer_->peer_close();
// If the close is already in progress, ignore this duplicate request
if (close_in_progress_) {
if (peer_close->IsCloseGraceful()) {
close_request_pending_ = true;
}
BGP_LOG_PEER(Event, peer_, SandeshLevel::SYS_INFO, BGP_LOG_FLAG_ALL,
BGP_PEER_DIR_NA, "Close procedure already in progress");
return;
} else {
BGP_LOG_PEER(Event, peer_, SandeshLevel::SYS_INFO, BGP_LOG_FLAG_ALL,
BGP_PEER_DIR_NA, "Close procedure initiated");
}
close_in_progress_ = true;
peer_close->CustomClose();
bool gr_cancelled = false;
// If stale timer is already running, cancel the timer and do hard close
if (stale_timer_running_) {
boost::system::error_code ec;
stale_timer_->Cancel();
gr_cancelled = true;
}
// Start process to delete this peer's RibIns and RibOuts. Peer can be
// deleted only after these (asynchronous) activities are complete
peer_->server()->membership_mgr()->UnregisterPeer(peer_,
boost::bind(&PeerCloseManager::GetActionAtStart, this, _1),
boost::bind(&PeerCloseManager::CloseComplete, this, _1, _2, false,
gr_cancelled));
}
// For graceful-restart, we take mark-and-sweep approach instead of directly
// deleting the paths. In the first walk, local-preference is lowered so that
// the paths are least preferred and they are marked stale. After some time, if
// the peer session does not come back up, we delete all the paths and the peer
// itself. If the session did come back up, we flush only those paths that were
// not learned again in the new session.
//
// Concurrency: Runs in the context of the DB Walker task launched by peer rib
// membership manager
//
// DBWalker callback routine for each of the RibIn prefix.
//
void PeerCloseManager::ProcessRibIn(DBTablePartBase *root, BgpRoute *rt,
BgpTable *table, int action_mask) {
DBRequest::DBOperation oper;
BgpAttrPtr attrs;
MembershipRequest::Action action;
// Look for the flags that we care about
action = static_cast<MembershipRequest::Action>(action_mask &
(MembershipRequest::RIBIN_STALE |
MembershipRequest::RIBIN_SWEEP |
MembershipRequest::RIBIN_DELETE));
if (action == MembershipRequest::INVALID) return;
// Process all paths sourced from this peer_. Multiple paths could exist
// in ecmp cases.
for (Route::PathList::iterator it = rt->GetPathList().begin(), next = it;
it != rt->GetPathList().end(); it = next) {
next++;
BgpPath *path = static_cast<BgpPath *>(it.operator->());
// Skip paths from other peers.
if (path->GetPeer() != peer_)
continue;
// Skip resolved paths - PathResolver is responsible for them.
if (path->IsResolved())
continue;
// Skip secondary paths.
if (dynamic_cast<BgpSecondaryPath *>(path))
continue;
switch (action) {
case MembershipRequest::RIBIN_SWEEP:
// Stale paths must be deleted
if (!path->IsStale()) {
return;
}
oper = DBRequest::DB_ENTRY_DELETE;
attrs = NULL;
break;
case MembershipRequest::RIBIN_DELETE:
// This path must be deleted. Hence attr is not required
oper = DBRequest::DB_ENTRY_DELETE;
attrs = NULL;
break;
case MembershipRequest::RIBIN_STALE:
// This path must be marked for staling. Update the local
// preference and update the route accordingly
oper = DBRequest::DB_ENTRY_ADD_CHANGE;
// Update attrs with maximum local preference so that this path
// is least preferred
// TODO(ananth): Check for the right local-pref value to use
attrs = peer_->server()->attr_db()->\
ReplaceLocalPreferenceAndLocate(path->GetAttr(), 1);
path->SetStale();
break;
default:
return;
}
// Feed the route modify/delete request to the table input process
table->InputCommon(root, rt, path, peer_, NULL, oper, attrs,
path->GetPathId(), path->GetFlags(), path->GetLabel());
}
return;
}