Skip to content

Commit

Permalink
Alarmgen should ensure that we always able to catch the gevent kill e…
Browse files Browse the repository at this point in the history
…xception in worker gevents.

We go into a sleep after a kafka error. We should sleep in a section that can catch exceptions

AddressMismatchCompute and VrouterInterface alarms should not be raised when the VrouterAgent struct is absent.
AddressMismatchControl alarms should not be raised when the BgpRouterState struct is absent

When we check the object-type in api-server (during ContrailConfig insertion), we must replace "-" by "_"
Use "oper" instead of obj_dict to decide whether the object is being deleted. See:
bf6cba2

Change-Id: I2d3f8f66ac8ad8df13db816936e98ed7320c54a0
Closes-Bug: #1533158
Closes-Bug: #1536085
  • Loading branch information
anishmehta committed Feb 2, 2016
1 parent 2195e6c commit 61cafbf
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 36 deletions.
9 changes: 5 additions & 4 deletions src/config/api-server/vnc_cfg_ifmap.py
Expand Up @@ -1678,7 +1678,7 @@ def dbe_uve_trace(self, oper, typ, uuid, obj_dict):
else:
oo['name'] = self.uuid_to_fq_name(uuid)
oo['value'] = obj_dict
oo['type'] = typ
oo['type'] = typ.replace('-', '_')

req_id = get_trace_id()
db_trace = DBRequestTrace(request_id=req_id)
Expand Down Expand Up @@ -1708,10 +1708,11 @@ def dbe_uve_trace(self, oper, typ, uuid, obj_dict):
else:
return

if oo['value']:
cc = ContrailConfig(name=ukey, elements=emap)
if oper.upper() == 'DELETE':
cc = ContrailConfig(name=ukey, elements=emap, deleted=True)
else:
cc = ContrailConfig(name=ukey, elements={}, deleted=True)
cc = ContrailConfig(name=ukey, elements=emap)

cfg_msg = ContrailConfigTrace(data=cc, table=utab,
sandesh=self._sandesh)
cfg_msg.send(sandesh=self._sandesh)
Expand Down
6 changes: 5 additions & 1 deletion src/opserver/partition_handler.py
Expand Up @@ -557,8 +557,12 @@ def msg_handler(self, mlist):

def _run(self):
pcount = 0
pause = False
while True:
try:
if pause:
gevent.sleep(2)
pause = False
self._logger.error("New KafkaClient %s" % self._topic)
self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
try:
Expand Down Expand Up @@ -618,7 +622,7 @@ def _run(self):
self._logger.error("%s : traceback %s" % \
(messag, traceback.format_exc()))
self.stop_partition()
gevent.sleep(2)
pause = True

self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
partdb = self.stop_partition()
Expand Down
90 changes: 62 additions & 28 deletions src/opserver/plugins/alarm_address_mismatch/main.py
Expand Up @@ -11,36 +11,62 @@ def __init__(self):

def __call__(self, uve_key, uve_data):
or_list = []
try:
uattr = uve_data["ContrailConfig"]["elements"]
if isinstance(uattr,list):
uattr = uattr[0][0]
lval = json.loads(uattr["virtual_router_ip_address"])
except KeyError:
lval = None
and_list = []
trigger = True

try:
rval1 = uve_data["VrouterAgent"]["self_ip_list"]
except KeyError:
rval1 = None
if trigger:
if "ContrailConfig" not in uve_data:
trigger = False
else:
and_list.append(AlarmElement(\
rule=AlarmTemplate(oper="!=",
operand1=Operand1(keys=["ContrailConfig"]),
operand2=Operand2(json_value="null")),
json_operand1_value=json.dumps({})))

try:
rval2 = uve_data["VrouterAgent"]["control_ip"]
except KeyError:
rval2 = None
try:
uattr = uve_data["ContrailConfig"]["elements"]
if isinstance(uattr,list):
uattr = uattr[0][0]
lval = json.loads(uattr["virtual_router_ip_address"])
except KeyError:
lval = None

if not isinstance(rval1,list) or lval not in rval1:
and_list = []
and_list.append(AlarmElement(\
rule=AlarmTemplate(oper="not in",
operand1=Operand1(keys=\
["ContrailConfig","elements","virtual_router_ip_address"],
json=2),
operand2=Operand2(keys=["VrouterAgent","self_ip_list"])),
json_operand1_value=json.dumps(lval),
json_operand2_value=json.dumps(rval1)))
if trigger:
if "VrouterAgent" not in uve_data:
trigger = False
else:
and_list.append(AlarmElement(\
rule=AlarmTemplate(oper="!=",
operand1=Operand1(keys=["VrouterAgent"]),
operand2=Operand2(json_value="null")),
json_operand1_value=json.dumps({})))

if trigger:
try:
rval1 = uve_data["VrouterAgent"]["self_ip_list"]
except KeyError:
rval1 = None

if not isinstance(rval1,list) or lval not in rval1:
and_list.append(AlarmElement(\
rule=AlarmTemplate(oper="not in",
operand1=Operand1(keys=\
["ContrailConfig","elements","virtual_router_ip_address"],
json=2),
operand2=Operand2(keys=["VrouterAgent","self_ip_list"])),
json_operand1_value=json.dumps(lval),
json_operand2_value=json.dumps(rval1)))
else:
trigger = False

if len(and_list) > 0 and lval != rval2:
if trigger:
try:
rval2 = uve_data["VrouterAgent"]["control_ip"]
except KeyError:
rval2 = None

if lval != rval2:
and_list.append(AlarmElement(\
rule=AlarmTemplate(oper="!=",
operand1=Operand1(keys=\
Expand All @@ -50,9 +76,11 @@ def __call__(self, uve_key, uve_data):
operand2=Operand2(keys=["VrouterAgent","control_ip"])),
json_operand1_value=json.dumps(lval),
json_operand2_value=json.dumps(rval2)))
or_list.append(AllOf(all_of=and_list))
else:
trigger = False

if len(or_list):
if trigger:
or_list.append(AllOf(all_of=and_list))
return or_list
else:
return None
Expand All @@ -66,6 +94,9 @@ def __init__(self):

def __call__(self, uve_key, uve_data):

if "ContrailConfig" not in uve_data:
return None

try:
uattr = uve_data["ContrailConfig"]["elements"]
if isinstance(uattr,list):
Expand All @@ -74,6 +105,9 @@ def __call__(self, uve_key, uve_data):
except KeyError:
lval = None

if "BgpRouterState" not in uve_data:
return None

try:
rval = uve_data["BgpRouterState"]["bgp_router_ip_list"]
except KeyError:
Expand Down
1 change: 1 addition & 0 deletions src/opserver/plugins/alarm_config_incorrect/main.py
@@ -1,5 +1,6 @@
from opserver.plugins.alarm_base import *
from opserver.sandesh.alarmgen_ctrl.sandesh_alarm_base.ttypes import *
import json

class ConfIncorrect(AlarmBase):
def __init__(self, sev = AlarmBase.SYS_ERR):
Expand Down
4 changes: 2 additions & 2 deletions src/opserver/plugins/alarm_vrouter_interface/main.py
Expand Up @@ -11,7 +11,7 @@ def __init__(self):

def __call__(self, uve_key, uve_data):
or_list = []
if not "VrouterAgent" in uve_data:
if "VrouterAgent" in uve_data:
and_list = []
and_list.append(AlarmElement(\
rule=AlarmTemplate(oper="!=",
Expand All @@ -21,7 +21,7 @@ def __call__(self, uve_key, uve_data):

ust = uve_data["VrouterAgent"]

if not "error_intf_list" in ust:
if "error_intf_list" in ust:
and_list.append(AlarmElement(\
rule=AlarmTemplate(oper="!=",
operand1=Operand1(keys=["VrouterAgent","error_intf_list"]),
Expand Down
7 changes: 6 additions & 1 deletion src/opserver/uveserver.py
Expand Up @@ -46,10 +46,11 @@ def redis_instances(self):

def update_redis_uve_list(self, redis_uve_list):
newlist = set(redis_uve_list)

chg = False
# if some redis instances are gone, remove them from our map
for test_elem in self._redis_uve_map.keys():
if test_elem not in newlist:
chg = True
r_ip = test_elem[0]
r_port = test_elem[1]
del self._redis_uve_map[test_elem]
Expand All @@ -59,11 +60,15 @@ def update_redis_uve_list(self, redis_uve_list):
# new redis instances need to be inserted into the map
for test_elem in newlist:
if test_elem not in self._redis_uve_map:
chg = True
r_ip = test_elem[0]
r_port = test_elem[1]
self._redis_uve_map[test_elem] = None
ConnectionState.update(ConnectionType.REDIS_UVE,\
r_ip+":"+str(r_port), ConnectionStatus.INIT)
if chg:
self._logger.error("updated redis_uve_list %s" % str(self._redis_uve_map))

# Exercise redis connections to update health
if len(newlist):
self.get_uve("ObjectCollectorInfo:__NONE__", False, None)
Expand Down

0 comments on commit 61cafbf

Please sign in to comment.