From 981d6cf6992e0721e07a7fe7d47df892e1e03338 Mon Sep 17 00:00:00 2001 From: Raj Reddy Date: Thu, 12 May 2016 15:23:08 -0700 Subject: [PATCH] Closes-Bug: #1570173 We need to periodically look at the core file directory and update the core_file_list that gets sent in the NodeStatus UVE (cherry picked from commit 141a4e0cf59166d0744e34f74526108012864c1a) Conflicts: src/nodemgr/common/event_manager.py Change-Id: I0cdacfa9c3194ba376a8c7f432bf907e1320381e --- src/nodemgr/common/event_manager.py | 60 +++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/src/nodemgr/common/event_manager.py b/src/nodemgr/common/event_manager.py index c8d7fcaec13..d820cced6c0 100644 --- a/src/nodemgr/common/event_manager.py +++ b/src/nodemgr/common/event_manager.py @@ -150,6 +150,40 @@ def _add_build_info(self, node_status): self.curr_build_info = self.new_build_info node_status.build_info = self.curr_build_info + def update_process_core_file_list(self): + #LOG_DEBUG sys.stderr.write('update_process_core_file_list: begin:') + ret_value = False + try: + ls_command = "ls -1 /var/crashes" + (corenames, stderr) = Popen( + ls_command.split(), + stdout=PIPE).communicate() + + process_state_db_tmp = {} + for key in self.process_state_db: + #LOG_DEBUG sys.stderr.write('update_process_core_file_list: key: '+key+'\n') + proc_stat = self.get_process_stat_object(key) + process_state_db_tmp[key] = proc_stat + + #LOG_DEBUG sys.stderr.write('update_process_core_file_list: corenames: '+corenames+'\n') + for corename in corenames.split(): + exec_name = corename.split('.')[1] + for key in self.process_state_db: + if key.startswith(exec_name): + #LOG_DEBUG sys.stderr.write('update_process_core_file_list: startswith: '+exec_name+'\n') + process_state_db_tmp[key].core_file_list.append(corename.rstrip()) + + for key in self.process_state_db: + if set(process_state_db_tmp[key].core_file_list) != set(self.process_state_db[key].core_file_list): + self.process_state_db[key].core_file_list = process_state_db_tmp[key].core_file_list + ret_value = True + except Exception as e: + sys.stderr.write('update_process_core_file_list: exception: '+str(e)) + + #LOG_DEBUG sys.stderr.write('update_process_core_file_list: ret_value: '+str(ret_value)+'\n') + return ret_value + #end update_process_core_file_list + def send_process_state_db_base(self, group_names, ProcessInfo, NodeStatus, NodeStatusUVE): name = socket.gethostname() @@ -171,7 +205,6 @@ def send_process_state_db_base(self, group_names, ProcessInfo, process_info.last_exit_time = pstat.exit_time process_info.core_file_list = pstat.core_file_list process_infos.append(process_info) - name = pstat.name if pstat.deleted == False: delete_status = False @@ -180,25 +213,24 @@ def send_process_state_db_base(self, group_names, ProcessInfo, # send node UVE node_status = NodeStatus() - node_status.name = name + node_status.name = socket.gethostname() node_status.deleted = delete_status node_status.process_info = process_infos - node_status.all_core_file_list = self.all_core_file_list if (self.send_build_info): self._add_build_info(node_status) node_status_uve = NodeStatusUVE(data=node_status) - msg = 'Sending UVE:' + str(node_status_uve) + msg = 'send_process_state_db_base: Sending UVE:' + str(node_status_uve) self.sandesh_global.logger().log(SandeshLogger.get_py_logger_level( SandeshLevel.SYS_INFO), msg) node_status_uve.send() - def send_all_core_file(self): + def update_all_core_file(self): stat_command_option = "stat --printf=%Y /var/crashes" modified_time = Popen( stat_command_option.split(), stdout=PIPE).communicate() if modified_time[0] == self.core_dir_modified_time: - return + return False self.core_dir_modified_time = modified_time[0] ls_command_option = "ls /var/crashes" (corename, stderr) = Popen( @@ -206,6 +238,7 @@ def send_all_core_file(self): stdout=PIPE).communicate() self.all_core_file_list = corename.split('\n')[0:-1] self.send_process_state_db(self.group_names) + return True def get_process_stat_object(self, pname): return ProcessStat(pname) @@ -318,9 +351,9 @@ def send_nodemgr_process_status_base(self, ProcessStateNames, if (self.send_build_info): self._add_build_info(node_status) node_status_uve = NodeStatusUVE(data=node_status) - msg = 'Sending UVE:' + str(node_status_uve) - self.sandesh_global.logger().log(SandeshLogger.get_py_logger_level( - SandeshLevel.SYS_INFO), msg) + msg = 'send_nodemgr_process_status_base: Sending UVE:' + str(node_status_uve) + self.sandesh_global.logger().log(SandeshLogger.get_py_logger_level( + SandeshLevel.SYS_INFO), msg) node_status_uve.send() def send_disk_usage_info_base(self, NodeStatusUVE, NodeStatus, @@ -352,10 +385,13 @@ def send_disk_usage_info_base(self, NodeStatusUVE, NodeStatus, # send node UVE node_status = NodeStatus( name=socket.gethostname(), disk_usage_info=disk_usage_infos) + # send other core file + if self.update_all_core_file(): + node_status.all_core_file_list = self.all_core_file_list if (self.send_build_info): self._add_build_info(node_status) node_status_uve = NodeStatusUVE(data=node_status) - msg = 'Sending UVE:' + str(node_status_uve) + msg = 'send_disk_usage_info_base: Sending UVE:' + str(node_status_uve) self.sandesh_global.logger().log(SandeshLogger.get_py_logger_level( SandeshLevel.SYS_INFO), msg) node_status_uve.send() @@ -429,14 +465,14 @@ def event_process_communication(self, pdata): def event_tick_60(self, prev_current_time): self.tick_count += 1 - # send other core file - self.send_all_core_file() # send disk usage info periodically self.send_disk_usage_info() # typical ntp sync time is about 5 min - first time, # we scan only after 10 min if self.tick_count >= 10: self.check_ntp_status() + if self.update_process_core_file_list(): + self.send_process_state_db(['default']) current_time = int(time.time()) if ((abs(current_time - prev_current_time)) > 300):