Skip to content

Commit

Permalink
Handle delrequest.lua and uve async request failure in redis
Browse files Browse the repository at this point in the history
contrail-collector executes delrequest.lua script when a Generator
gets disconnected. On a scale setup, some Generators (vrouter-agent,
tor-agent) originate more UVEs and therefore delrequest.lua takes more
than 5 seconds [default lua-time-limit] to complete. If the lua script
runs for more than the configured time limit, then redis returns error
to other requests till the script is completed.

- Removed the chatty log from delrequest.lua as it affects the
performance. sub_del() should be called only for UVEs with
aggtype="stats" fields.
- Added assert if redis returns error for delrequest.lua and async
uveupdate and uvedelete requests.

Change-Id: I797a552ebae11adea49c3b58c9775c0014bd6731
Partial-Bug: #1459769
  • Loading branch information
Sundaresan Rajangam committed Jun 9, 2015
1 parent 3b996d2 commit 6776108
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 9 deletions.
4 changes: 4 additions & 0 deletions src/analytics/OpServerProxy.cc
Expand Up @@ -351,6 +351,10 @@ class OpServerProxy::OpServerImpl {
LOG(DEBUG, "NULL Reply...\n");
return;
}
// If redis returns error for async request, then perhaps it
// is busy executing a script and it has reached the maximum
// execution time limit.
assert(reply->type != REDIS_REPLY_ERROR);

if (rpi) {
rpi->ProcessCallback(reply);
Expand Down
12 changes: 10 additions & 2 deletions src/analytics/delrequest.lua
Expand Up @@ -27,10 +27,17 @@ redis.log(redis.LOG_NOTICE,"DelRequest for "..sm)
local db = tonumber(ARGV[5])
redis.call('select',db)
local typ = redis.call('smembers',"TYPES:"..sm)
--- In a scaled setup, sub_del() can be bottleneck.
--- Therefore, call sub_del() only for types, where aggtype="stats" is specified
--- aggtype="stats" is deprecated and will not be supported from next release
local stat_types = {ModuleServerState=true, ModuleCpuState=true,
BgpRouterState=true, VrouterStatsAgent=true}

for k,v in pairs(typ) do
redis.log(redis.LOG_NOTICE, "Read UVES:"..sm..":"..v)
local lres = redis.call('zrange',"UVES:"..sm..":"..v, 0, -1, "withscores")
local iter = 1
redis.log(redis.LOG_NOTICE, "Delete "..sm..":"..v.." [#"..(#lres/2).."]")
while iter <= #lres do
local deltyp = v
local deluve = lres[iter]
Expand All @@ -47,10 +54,11 @@ for k,v in pairs(typ) do
redis.call('hdel', "KEY2PART:"..sm..":"..deltyp, deluve)
redis.call('srem', "PART2KEY:"..part, sm..":"..deltyp..":"..deluve)
end
redis.log(redis.LOG_NOTICE,"DEL for "..dkey.." part "..part)

local dval = "VALUES:"..deluve..":"..sm..":"..deltyp
sub_del(dval)
if stat_types[deltyp] then
sub_del(dval)
end

local lttt = redis.call('exists', dval)
if lttt == 1 then
Expand Down
19 changes: 12 additions & 7 deletions src/analytics/redis_processor_vizd.cc
Expand Up @@ -203,10 +203,11 @@ RedisProcessorExec::SyncDeleteUVEs(const std::string & redis_ip, unsigned short
const std::string &module, const std::string &instance_id) {

redisContext *c = redisConnect(redis_ip.c_str(), redis_port);
std::string generator(source + ":" + node_type + ":" + module +
":" + instance_id);

if (c->err) {
LOG(ERROR, "No connection for SyncDeleteUVEs " << source << ":" <<
node_type << ":" << module << ":" << instance_id);
LOG(ERROR, "No connection for SyncDeleteUVEs : " << generator);
redisFree(c);
return false;
}
Expand All @@ -233,7 +234,8 @@ RedisProcessorExec::SyncDeleteUVEs(const std::string & redis_ip, unsigned short
module.c_str(), instance_id.c_str(), integerToString(REDIS_DB_UVE).c_str());

if (!reply) {
LOG(INFO, "SyncDeleteUVEs Error : " << c->errstr);
LOG(ERROR, "SyncDeleteUVEs failed for " << generator << " : " <<
c->errstr);
redisFree(c);
return false;
}
Expand All @@ -243,11 +245,14 @@ RedisProcessorExec::SyncDeleteUVEs(const std::string & redis_ip, unsigned short
redisFree(c);
return true;
}
LOG(ERROR, "Unrecognized reponse of type " << reply->type <<
" for SyncDeleteUVEs " << source << ":" << node_type << ":" <<
module << ":" << instance_id);
LOG(ERROR, "Unrecognized response of type " << reply->type <<
" for SyncDeleteUVEs : " << generator);
freeReplyObject(reply);
redisFree(c);
redisFree(c);
// Redis returns error if the time taken to execute the script is
// more than the lua-time-limit configured in redis.conf
// It is not easy to handle this case gracefully, hence assert.
assert(0);
return false;
}

Expand Down

0 comments on commit 6776108

Please sign in to comment.