diff --git a/src/nodemgr/common/cpuinfo.py b/src/nodemgr/common/cpuinfo.py index 6616d5408e7..24efd85f020 100644 --- a/src/nodemgr/common/cpuinfo.py +++ b/src/nodemgr/common/cpuinfo.py @@ -11,8 +11,10 @@ class MemCpuUsageData(object): - def __init__(self, pid): + def __init__(self, pid, last_cpu, last_time): self.pid = pid + self.last_cpu = last_cpu + self.last_time = last_time try: self._process = psutil.Process(self.pid) except psutil.NoSuchProcess: @@ -20,8 +22,6 @@ def __init__(self, pid): else: if not hasattr(self._process, 'get_memory_info'): self._process.get_memory_info = self._process.memory_info - if not hasattr(self._process, 'get_cpu_percent'): - self._process.get_cpu_percent = self._process.cpu_percent #end __init__ def get_num_socket(self): @@ -68,22 +68,71 @@ def _get_cpu_load_avg(self): return cpu_load_avg #end _get_cpu_load_avg - def _get_cpu_share(self): - cpu_percent = self._process.get_cpu_percent(interval=0.1) - return cpu_percent/self.get_num_cpu() - #end _get_cpu_share + def _get_sys_cpu_share(self): + last_cpu = self.last_cpu + last_time = self.last_time + + current_cpu = psutil.cpu_times() + current_time = 0.00 + for i in range(0, len(current_cpu)-1): + current_time += current_cpu[i] + + # tracking system/user time only + interval_time = 0 + if last_cpu and (last_time != 0): + sys_time = current_cpu.system - last_cpu.system + usr_time = current_cpu.user - last_cpu.user + interval_time = current_time - last_time + + self.last_cpu = current_cpu + self.last_time = current_time + + if interval_time > 0: + sys_percent = 100 * sys_time / interval_time + usr_percent = 100 * usr_time / interval_time + cpu_share = round((sys_percent + usr_percent)/self.get_num_cpu(), 2) + return cpu_share + else: + return 0 + #end _get_sys_cpu_share def get_sys_mem_cpu_info(self): sys_mem_cpu = SystemMemCpuUsage() sys_mem_cpu.cpu_load = self._get_cpu_load_avg() sys_mem_cpu.mem_info = self._get_sys_mem_info() - sys_mem_cpu.cpu_share = self._get_cpu_share() + sys_mem_cpu.cpu_share = self._get_sys_cpu_share() return sys_mem_cpu #end get_sys_mem_cpu_info + def _get_process_cpu_share(self): + last_cpu = self.last_cpu + last_time = self.last_time + + current_cpu = self._process.get_cpu_times() + current_time = os.times()[4] + + # tracking system/user time only + interval_time = 0 + if last_cpu and (last_time != 0): + sys_time = current_cpu.system - last_cpu.system + usr_time = current_cpu.user - last_cpu.user + interval_time = current_time - last_time + + self.last_cpu = current_cpu + self.last_time = current_time + + if interval_time > 0: + sys_percent = 100 * sys_time / interval_time + usr_percent = 100 * usr_time / interval_time + cpu_share = round((sys_percent + usr_percent)/self.get_num_cpu(), 2) + return cpu_share + else: + return 0 + #end _get_process_cpu_share + def get_process_mem_cpu_info(self): process_mem_cpu = ProcessCpuInfo() - process_mem_cpu.cpu_share = self._process.get_cpu_percent(interval=0.1)/psutil.NUM_CPUS + process_mem_cpu.cpu_share = self._get_process_cpu_share() process_mem_cpu.mem_virt = self._process.get_memory_info().vms/1024 process_mem_cpu.mem_res = self._process.get_memory_info().rss/1024 return process_mem_cpu diff --git a/src/nodemgr/common/event_manager.py b/src/nodemgr/common/event_manager.py index 29971fd0a56..3ee9908280c 100644 --- a/src/nodemgr/common/event_manager.py +++ b/src/nodemgr/common/event_manager.py @@ -47,6 +47,7 @@ class EventManager(object): rules_data = [] group_names = [] process_state_db = {} + third_party_process_state_db = {} FAIL_STATUS_DUMMY = 0x1 FAIL_STATUS_DISK_SPACE = 0x2 FAIL_STATUS_SERVER_PORT = 0x4 @@ -78,6 +79,8 @@ def __init__(self, rule_file, discovery_server, self.curr_build_info = None self.new_build_info = None self.send_build_info = send_build_info + self.last_cpu = None + self.last_time = 0 # Get all the current processes in the node def get_current_process(self): @@ -380,7 +383,7 @@ def send_nodemgr_process_status_base(self, ProcessStateNames, node_status_uve.send() def send_system_cpu_info(self): - mem_cpu_usage_data = MemCpuUsageData(os.getpid()) + mem_cpu_usage_data = MemCpuUsageData(os.getpid(), self.last_cpu, self.last_time) sys_cpu = SystemCpuInfo() sys_cpu.num_socket = mem_cpu_usage_data.get_num_socket() sys_cpu.num_cpu = mem_cpu_usage_data.get_num_cpu() @@ -393,8 +396,11 @@ def send_system_cpu_info(self): node_status_uve.send() def get_system_mem_cpu_usage(self): - system_mem_cpu_usage_data = MemCpuUsageData(os.getpid()) - return system_mem_cpu_usage_data.get_sys_mem_cpu_info() + system_mem_cpu_usage_data = MemCpuUsageData(os.getpid(), self.last_cpu, self.last_time) + system_mem_cpu_usage = system_mem_cpu_usage_data.get_sys_mem_cpu_info() + self.last_cpu = system_mem_cpu_usage_data.last_cpu + self.last_time = system_mem_cpu_usage_data.last_time + return system_mem_cpu_usage def get_all_processes_mem_cpu_usage(self): process_mem_cpu_usage = {} @@ -402,7 +408,7 @@ def get_all_processes_mem_cpu_usage(self): pstat = self.process_state_db[key] if (pstat.process_state == 'PROCESS_STATE_RUNNING'): try: - mem_cpu_usage_data = MemCpuUsageData(pstat.pid) + mem_cpu_usage_data = MemCpuUsageData(pstat.pid, pstat.last_cpu, pstat.last_time) process_mem_cpu = mem_cpu_usage_data.get_process_mem_cpu_info() except psutil.NoSuchProcess: sys.stderr.write("NoSuchProcess: process name:%s pid:%d\n" @@ -410,6 +416,8 @@ def get_all_processes_mem_cpu_usage(self): else: process_mem_cpu.__key = pstat.pname process_mem_cpu_usage[process_mem_cpu.__key] = process_mem_cpu + pstat.last_cpu = mem_cpu_usage_data.last_cpu + pstat.last_time = mem_cpu_usage_data.last_time # walk through all processes being monitored by nodemgr, # not spawned by supervisord @@ -420,15 +428,24 @@ def get_all_processes_mem_cpu_usage(self): stdout, stderr = proc.communicate() if (stdout != ''): pid = int(stdout.strip('\n')) + if pname in self.third_party_process_state_db: + pstat = self.third_party_process_state_db[pname] + else: + pstat = self.get_process_stat_object(pname) + pstat.pid = pid + self.third_party_process_state_db[pname] = pstat try: - mem_cpu_usage_data = MemCpuUsageData(pid) + mem_cpu_usage_data = MemCpuUsageData(pstat.pid, pstat.last_cpu, pstat.last_time) process_mem_cpu = mem_cpu_usage_data.get_process_mem_cpu_info() except psutil.NoSuchProcess: sys.stderr.write("NoSuchProcess: process name:%s pid:%d\n" - % (pname, pid)) + % (pstat.pname, pstat.pid)) + self.third_party_process_state_db.pop(pstat.name) else: process_mem_cpu.__key = pname process_mem_cpu_usage[process_mem_cpu.__key] = process_mem_cpu + pstat.last_cpu = mem_cpu_usage_data.last_cpu + pstat.last_time = mem_cpu_usage_data.last_time return process_mem_cpu_usage def get_disk_usage(self): diff --git a/src/nodemgr/common/process_stat.py b/src/nodemgr/common/process_stat.py index 0459e23d93f..6623edfef43 100644 --- a/src/nodemgr/common/process_stat.py +++ b/src/nodemgr/common/process_stat.py @@ -6,7 +6,7 @@ class ProcessStat(object): - def __init__(self, pname): + def __init__(self, pname, last_cpu = None, last_time = 0): self.start_count = 0 self.stop_count = 0 self.exit_count = 0 @@ -21,3 +21,5 @@ def __init__(self, pname): self.name = socket.gethostname() self.pname = pname self.pid = 0 + self.last_cpu = last_cpu + self.last_time = last_time