diff --git a/src/analytics/analytics.sandesh b/src/analytics/analytics.sandesh index af0f7605e28..435a2a77244 100644 --- a/src/analytics/analytics.sandesh +++ b/src/analytics/analytics.sandesh @@ -76,6 +76,9 @@ struct NodeStatus { 6: optional list disk_usage_info (tags="") 7: optional string description 8: optional list all_core_file_list + 10: optional list process_mem_cpu_usage (aggtype="union") + 11: optional cpuinfo.SystemMemCpuUsage system_mem_cpu_usage + 12: optional cpuinfo.SystemCpuInfo system_cpu_info } /** diff --git a/src/analytics/database/SConscript b/src/analytics/database/SConscript index 70a3bb13b7a..a8c70825e2f 100755 --- a/src/analytics/database/SConscript +++ b/src/analytics/database/SConscript @@ -26,10 +26,12 @@ for file in local_sources: local_sources_rules.append(DatabaseEnv.Install("database", file)) database_pkg = DatabaseEnv.SandeshGenPy('#controller/src/analytics/database/database.sandesh', 'database/sandesh/', False) +cpuinfo_pkg = DatabaseEnv.SandeshGenPy('#controller/src/base/sandesh/cpuinfo.sandesh', 'database/sandesh/database/', False) +DatabaseEnv.Depends(cpuinfo_pkg, database_pkg) process_info_pkg = DatabaseEnv.SandeshGenPy('#/controller/src/base/sandesh/process_info.sandesh', 'database/sandesh/database/', False) DatabaseEnv.Depends(process_info_pkg, database_pkg) -sdist_depends = [setup_sources_rules, local_sources_rules, database_pkg, process_info_pkg] +sdist_depends = [setup_sources_rules, local_sources_rules, database_pkg, cpuinfo_pkg, process_info_pkg] sdist_gen = DatabaseEnv.Command('dist', 'setup.py', 'cd ' + Dir('.').path + ' && python setup.py sdist') DatabaseEnv.Depends(sdist_gen, sdist_depends) diff --git a/src/analytics/database/database.sandesh b/src/analytics/database/database.sandesh index 08beceb82f5..935e56e1a2e 100755 --- a/src/analytics/database/database.sandesh +++ b/src/analytics/database/database.sandesh @@ -7,6 +7,7 @@ * Database Node UVE. */ +include "base/sandesh/cpuinfo.sandesh" include "base/sandesh/process_info.sandesh" /** @@ -53,6 +54,9 @@ struct NodeStatus { 7: optional string description 8: optional list all_core_file_list 9: optional string build_info + 10: optional list process_mem_cpu_usage (aggtype="union") + 11: optional cpuinfo.SystemMemCpuUsage system_mem_cpu_usage + 12: optional cpuinfo.SystemCpuInfo system_cpu_info } /** diff --git a/src/analytics/database/setup.py b/src/analytics/database/setup.py index 4c9ea2b4878..d45fefbc12e 100755 --- a/src/analytics/database/setup.py +++ b/src/analytics/database/setup.py @@ -10,7 +10,8 @@ packages=['database', 'database.sandesh', 'database.sandesh.database', - 'database.sandesh.database.process_info' + 'database.sandesh.database.process_info', + 'database.sandesh.database.cpuinfo' ], package_data={'': ['*.html', '*.css', '*.xml']}, zip_safe=False, diff --git a/src/base/cpuinfo.cc b/src/base/cpuinfo.cc index b3f8d69a863..d5606e31f79 100644 --- a/src/base/cpuinfo.cc +++ b/src/base/cpuinfo.cc @@ -109,7 +109,9 @@ static void SystemMemInfo(SystemMemInfo &info) { // MemFree: 90333184 kB file >> tmp; file >> info.free; file >> tmp; // Buffers: 1029924 kB - file >> tmp; file >> info.buffers; + file >> tmp; file >> info.buffers; file >> tmp; + // Cached: 10290012 kB + file >> tmp; file >> info.cached; // Used = Total - Free info.used = info.total - info.free; } @@ -179,6 +181,7 @@ void CpuLoadData::FillCpuInfo(CpuLoadInfo &cpu_load_info, bool system) { sys_mem_info.set_used(info.sys_mem_info.used); sys_mem_info.set_free(info.sys_mem_info.free); sys_mem_info.set_buffers(info.sys_mem_info.buffers); + sys_mem_info.set_cached(info.sys_mem_info.cached); cpu_load_info.set_sys_mem_info(sys_mem_info); } } diff --git a/src/base/cpuinfo.h b/src/base/cpuinfo.h index 297f4f456bd..a3f6b32ce79 100644 --- a/src/base/cpuinfo.h +++ b/src/base/cpuinfo.h @@ -29,6 +29,7 @@ struct SystemMemInfo { uint32_t used; uint32_t free; uint32_t buffers; + uint32_t cached; }; struct CpuInfo { diff --git a/src/base/sandesh/cpuinfo.sandesh b/src/base/sandesh/cpuinfo.sandesh index 8154f375c99..e0dcad713e6 100644 --- a/src/base/sandesh/cpuinfo.sandesh +++ b/src/base/sandesh/cpuinfo.sandesh @@ -27,6 +27,7 @@ struct SysMemInfo { 2: u32 used; 3: u32 free; 4: u32 buffers; + 5: u32 cached; } /** @@ -57,3 +58,19 @@ struct ProcessCpuInfo { 4: double cpu_share 5: u32 mem_res } + +struct SystemCpuInfo { + 1: u32 num_socket; + 2: u32 num_cpu; + 3: u32 num_core_per_socket; + 4: u32 num_thread_per_core; +} + +/** + * This structure carries system memory and cpu information + */ +struct SystemMemCpuUsage { + 1: SysMemInfo mem_info; + 2: CpuLoadAvg cpu_load; + 3: double cpu_share; +} diff --git a/src/config/common/vnc_cpu_info.py b/src/config/common/vnc_cpu_info.py index f8b8e392762..6e8369ceb7a 100644 --- a/src/config/common/vnc_cpu_info.py +++ b/src/config/common/vnc_cpu_info.py @@ -127,6 +127,7 @@ def _send_cpustats(self): mod_cpu.cpu_info.sys_mem_info.used = self._virtmem_info.used / 1024 mod_cpu.cpu_info.sys_mem_info.free = self._virtmem_info.free / 1024 mod_cpu.cpu_info.sys_mem_info.buffers = self._virtmem_info.buffers / 1024 + mod_cpu.cpu_info.sys_mem_info.cached = self._virtmem_info.cached / 1024 # populate CPU Load avg mod_cpu.cpu_info.cpuload = CpuLoadAvg() diff --git a/src/config/uve/cfgm_cpuinfo.sandesh b/src/config/uve/cfgm_cpuinfo.sandesh index 718fe952052..fc2b9d5d644 100644 --- a/src/config/uve/cfgm_cpuinfo.sandesh +++ b/src/config/uve/cfgm_cpuinfo.sandesh @@ -52,6 +52,9 @@ struct NodeStatus { 6: optional list disk_usage_info (tags="") 7: optional string description 8: optional list all_core_file_list + 10: optional list process_mem_cpu_usage (aggtype="union") + 11: optional cpuinfo.SystemMemCpuUsage system_mem_cpu_usage + 12: optional cpuinfo.SystemCpuInfo system_cpu_info } uve sandesh NodeStatusUVE { diff --git a/src/control-node/sandesh/control_node.sandesh b/src/control-node/sandesh/control_node.sandesh index 4055a7b2367..d12aab54e8f 100644 --- a/src/control-node/sandesh/control_node.sandesh +++ b/src/control-node/sandesh/control_node.sandesh @@ -86,6 +86,9 @@ struct NodeStatus { 6: optional list disk_usage_info (tags="") 7: optional string description 8: optional list all_core_file_list + 10: optional list process_mem_cpu_usage (aggtype="union") + 11: optional cpuinfo.SystemMemCpuUsage system_mem_cpu_usage + 12: optional cpuinfo.SystemCpuInfo system_cpu_info } /** diff --git a/src/nodemgr/SConscript b/src/nodemgr/SConscript index 8edcdbfe63b..fb379a510c0 100644 --- a/src/nodemgr/SConscript +++ b/src/nodemgr/SConscript @@ -76,11 +76,15 @@ common_sources = [ 'common/event_manager.py', 'common/process_stat.py', 'common/event_listener_protocol_nodemgr.py', + 'common/cpuinfo.py', ] common_sources_rules = [] for file in common_sources: common_sources_rules.append(OpEnv.Install(Dir("nodemgr/common"), file)) +cpu_info_pkg = OpEnv.SandeshGenPy('#controller/src/base/sandesh/cpuinfo.sandesh', 'nodemgr/common/sandesh/', False) + + rel_path = Dir('nodemgr/common').path def BuildInfoAction(target, source, env): env.GenerateBuildInfoPyCode(path=rel_path) @@ -89,7 +93,8 @@ build_info_rules = [OpEnv.Command(target='buildinfo.py', source = None, action=B sdist_depends = [setup_sources_rules, local_sources_rules, analytics_sources_rules, control_sources_rules, config_sources_rules, common_sources_rules, - vrouter_sources_rules, database_sources_rules, build_info_rules + vrouter_sources_rules, database_sources_rules, build_info_rules, + cpu_info_pkg ] cd_cmd = 'cd ' + Dir('.').path + ' && ' diff --git a/src/nodemgr/analytics_nodemgr/analytics_event_manager.py b/src/nodemgr/analytics_nodemgr/analytics_event_manager.py index 1f6b7a9573e..b50f6995a5f 100644 --- a/src/nodemgr/analytics_nodemgr/analytics_event_manager.py +++ b/src/nodemgr/analytics_nodemgr/analytics_event_manager.py @@ -27,7 +27,7 @@ NodeStatusUVE, NodeStatus from pysandesh.connection_info import ConnectionState from analytics.process_info.ttypes import \ - ProcessStatus, ProcessState, ProcessInfo, DiskPartitionUsageStats + ProcessStatus, ProcessState, ProcessInfo from analytics.process_info.constants import \ ProcessStateNames @@ -55,6 +55,7 @@ def __init__(self, rule_file, discovery_server, self.instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) + self.send_system_cpu_info() # end __init__ def process(self): @@ -73,10 +74,12 @@ def send_nodemgr_process_status(self): ProcessStateNames, ProcessState, ProcessStatus, NodeStatus, NodeStatusUVE) + def get_node_status_class(self): + return NodeStatus + + def get_node_status_uve_class(self): + return NodeStatusUVE + def get_process_state(self, fail_status_bits): return self.get_process_state_base( fail_status_bits, ProcessStateNames, ProcessState) - - def send_disk_usage_info(self): - self.send_disk_usage_info_base( - NodeStatusUVE, NodeStatus, DiskPartitionUsageStats) diff --git a/src/nodemgr/common/cpuinfo.py b/src/nodemgr/common/cpuinfo.py new file mode 100644 index 00000000000..bcd7edd9bd4 --- /dev/null +++ b/src/nodemgr/common/cpuinfo.py @@ -0,0 +1,91 @@ +# +# Copyright (c) 2016 Juniper Networks, Inc. All rights reserved. +# + +import os +import psutil +import subprocess + +from subprocess import Popen, PIPE +from sandesh.cpuinfo.ttypes import * + +class MemCpuUsageData(object): + + def __init__(self, pid): + self.pid = pid + try: + self._process = psutil.Process(self.pid) + except psutil.NoSuchProcess: + raise + else: + if not hasattr(self._process, 'get_memory_info'): + self._process.get_memory_info = self._process.memory_info + if not hasattr(self._process, 'get_cpu_percent'): + self._process.get_cpu_percent = self._process.cpu_percent + #end __init__ + + def get_num_socket(self): + cmd = 'lscpu | grep "Socket(s):" | awk \'{print $2}\'' + proc = Popen(cmd, shell=True, stdout=PIPE) + return int(proc.communicate()[0]) + #end get_num_socket + + def get_num_cpu(self): + cmd = 'lscpu | grep "^CPU(s):" | awk \'{print $2}\'' + proc = Popen(cmd, shell=True, stdout=PIPE) + return int(proc.communicate()[0]) + #end get_num_cpu + + def get_num_core_per_socket(self): + cmd = 'lscpu | grep "Core(s) per socket:" | awk \'{print $4}\'' + proc = Popen(cmd, shell=True, stdout=PIPE) + return int(proc.communicate()[0]) + #end get_num_core_per_socket + + def get_num_thread_per_core (self): + cmd = 'lscpu | grep "Thread(s) per core:" | awk \'{print $4}\'' + proc = Popen(cmd, shell=True, stdout=PIPE) + return int(proc.communicate()[0]) + #end get_num_thread_per_core + + def _get_sys_mem_info(self): + virtmem_info = psutil.virtual_memory() + sys_mem_info = SysMemInfo() + sys_mem_info.total = virtmem_info.total/1024 + sys_mem_info.used = virtmem_info.used/1024 + sys_mem_info.free = virtmem_info.free/1024 + sys_mem_info.buffers = virtmem_info.buffers/1024 + sys_mem_info.cached = virtmem_info.cached/1024 + return sys_mem_info + #end _get_sys_mem_info + + def _get_cpu_load_avg(self): + load_avg = os.getloadavg() + cpu_load_avg = CpuLoadAvg() + cpu_load_avg.one_min_avg = load_avg[0] + cpu_load_avg.five_min_avg = load_avg[1] + cpu_load_avg.fifteen_min_avg = load_avg[2] + return cpu_load_avg + #end _get_cpu_load_avg + + def _get_cpu_share(self): + cpu_percent = self._process.get_cpu_percent(interval=0.1) + return cpu_percent/self.get_num_cpu() + #end _get_cpu_share + + def get_sys_mem_cpu_info(self): + sys_mem_cpu = SystemMemCpuUsage() + sys_mem_cpu.cpu_load = self._get_cpu_load_avg() + sys_mem_cpu.mem_info = self._get_sys_mem_info() + sys_mem_cpu.cpu_share = self._get_cpu_share() + return sys_mem_cpu + #end get_sys_mem_cpu_info + + def get_process_mem_cpu_info(self): + process_mem_cpu = ProcessCpuInfo() + process_mem_cpu.cpu_share = self._process.get_cpu_percent(interval=0.1)/psutil.NUM_CPUS + process_mem_cpu.mem_virt = self._process.get_memory_info().vms/1024 + process_mem_cpu.mem_res = self._process.get_memory_info().rss/1024 + return process_mem_cpu + #end get_process_mem_cpu_info +#end class MemCpuUsageData diff --git a/src/nodemgr/common/event_manager.py b/src/nodemgr/common/event_manager.py index 6ff41b22c46..5f8cfe16bce 100644 --- a/src/nodemgr/common/event_manager.py +++ b/src/nodemgr/common/event_manager.py @@ -9,6 +9,7 @@ from ConfigParser import NoOptionError, NoSectionError import sys import os +import psutil import socket import time import subprocess @@ -20,12 +21,14 @@ from nodemgr.common.event_listener_protocol_nodemgr import \ EventListenerProtocolNodeMgr from nodemgr.common.process_stat import ProcessStat +from nodemgr.common.sandesh.cpuinfo.ttypes import * +from nodemgr.common.cpuinfo import MemCpuUsageData from sandesh_common.vns.constants import INSTANCE_ID_DEFAULT import discoveryclient.client as client from buildinfo import build_info from pysandesh.sandesh_logger import * from pysandesh.gen_py.sandesh.ttypes import SandeshLevel - +from analytics.process_info.ttypes import DiskPartitionUsageStats class EventManager(object): rules_data = [] @@ -71,11 +74,14 @@ def get_current_process(self): None, None, serverurl=self.supervisor_serverurl)) # Add all current processes to make sure nothing misses the radar process_state_db = {} + # list of all processes on the node is made here for proc_info in proxy.supervisor.getAllProcessInfo(): if (proc_info['name'] != proc_info['group']): proc_name = proc_info['group'] + ":" + proc_info['name'] else: proc_name = proc_info['name'] + proc_pid = proc_info['pid'] + process_stat_ent = self.get_process_stat_object(proc_name) process_stat_ent.process_state = "PROCESS_STATE_" + \ proc_info['statename'] @@ -83,6 +89,7 @@ def get_current_process(self): 'PROCESS_STATE_RUNNING'): process_stat_ent.start_time = str(proc_info['start'] * 1000000) process_stat_ent.start_count += 1 + process_stat_ent.pid = proc_pid process_state_db[proc_name] = process_stat_ent return process_state_db # end get_current_process @@ -137,7 +144,7 @@ def check_ntp_status(self): self.fail_status_bits &= ~self.FAIL_STATUS_NTP_SYNC self.send_nodemgr_process_status() - def _add_build_info(self, node_status): + def get_build_info(self): # Retrieve build_info from package/rpm and cache it if self.curr_build_info is None: command = "contrail-version contrail-nodemgr | grep contrail-nodemgr" @@ -148,7 +155,7 @@ def _add_build_info(self, node_status): build_num + '"}]}' if (self.new_build_info != self.curr_build_info): self.curr_build_info = self.new_build_info - node_status.build_info = self.curr_build_info + return self.curr_build_info def update_process_core_file_list(self): #LOG_DEBUG sys.stderr.write('update_process_core_file_list: begin:') @@ -217,7 +224,7 @@ def send_process_state_db_base(self, group_names, ProcessInfo, node_status.deleted = delete_status node_status.process_info = process_infos if (self.send_build_info): - self._add_build_info(node_status) + node_status.build_info = self.get_build_info() node_status_uve = NodeStatusUVE(data=node_status) msg = 'send_process_state_db_base: Sending UVE:' + str(node_status_uve) self.sandesh_global.logger().log(SandeshLogger.get_py_logger_level( @@ -259,6 +266,7 @@ def send_process_state(self, pname, pstate, pheaders): proc_stat.start_count += 1 proc_stat.start_time = str(int(time.time() * 1000000)) send_uve = True + proc_stat.pid = int(pheaders['pid']) if (pstate == 'PROCESS_STATE_STOPPED'): proc_stat.stop_count += 1 @@ -349,19 +357,53 @@ def send_nodemgr_process_status_base(self, ProcessStateNames, node_status = NodeStatus(name=socket.gethostname(), process_status=process_status_list) if (self.send_build_info): - self._add_build_info(node_status) + node_status.build_info = self.get_build_info() node_status_uve = NodeStatusUVE(data=node_status) msg = 'send_nodemgr_process_status_base: Sending UVE:' + str(node_status_uve) self.sandesh_global.logger().log(SandeshLogger.get_py_logger_level( SandeshLevel.SYS_INFO), msg) node_status_uve.send() - def send_disk_usage_info_base(self, NodeStatusUVE, NodeStatus, - DiskPartitionUsageStats): + def send_system_cpu_info(self): + mem_cpu_usage_data = MemCpuUsageData(os.getpid()) + sys_cpu = SystemCpuInfo() + sys_cpu.num_socket = mem_cpu_usage_data.get_num_socket() + sys_cpu.num_cpu = mem_cpu_usage_data.get_num_cpu() + sys_cpu.num_core_per_socket = mem_cpu_usage_data.get_num_core_per_socket() + sys_cpu.num_thread_per_core = mem_cpu_usage_data.get_num_thread_per_core() + NodeStatus = self.get_node_status_class() + NodeStatusUVE = self.get_node_status_uve_class() + node_status = NodeStatus(name=socket.gethostname(), + system_cpu_info=sys_cpu) + node_status_uve = NodeStatusUVE(data=node_status) + node_status_uve.send() + + def get_system_mem_cpu_usage(self): + system_mem_cpu_usage_data = MemCpuUsageData(os.getpid()) + return system_mem_cpu_usage_data.get_sys_mem_cpu_info() + + def get_all_processes_mem_cpu_usage(self): + process_mem_cpu_usage = [] + for key in self.process_state_db: + pstat = self.process_state_db[key] + if (pstat.process_state == 'PROCESS_STATE_RUNNING'): + try: + mem_cpu_usage_data = MemCpuUsageData(pstat.pid) + except psutil.NoSuchProcess: + sys.stderr.write("NoSuchProcess: process name:%s pid:%d\n" + % (pstat.pname, pstat.pid)) + else: + process_mem_cpu = mem_cpu_usage_data.get_process_mem_cpu_info() + process_mem_cpu.module_id = pstat.pname + process_mem_cpu.inst_id = "0" # ?? + process_mem_cpu_usage.append(process_mem_cpu) + return process_mem_cpu_usage + + def get_disk_usage(self): + disk_usage_info = [] partition = subprocess.Popen( "df -T -t ext2 -t ext3 -t ext4 -t xfs", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - disk_usage_infos = [] for line in partition.stdout: if 'Filesystem' in line: continue @@ -386,22 +428,9 @@ def send_disk_usage_info_base(self, NodeStatusUVE, NodeStatus, except ValueError: sys.stderr.write("Failed to get local disk space usage" + "\n") else: - disk_usage_infos.append(disk_usage_stat) - - # send node UVE - node_status = NodeStatus( - name=socket.gethostname(), disk_usage_info=disk_usage_infos) - # send other core file - if self.update_all_core_file(): - node_status.all_core_file_list = self.all_core_file_list - if (self.send_build_info): - self._add_build_info(node_status) - node_status_uve = NodeStatusUVE(data=node_status) - msg = 'send_disk_usage_info_base: Sending UVE:' + str(node_status_uve) - self.sandesh_global.logger().log(SandeshLogger.get_py_logger_level( - SandeshLevel.SYS_INFO), msg) - node_status_uve.send() - # end send_disk_usage_info + disk_usage_info.append(disk_usage_stat) + return disk_usage_info + # end get_disk_usage def get_process_state_base(self, fail_status_bits, ProcessStateNames, ProcessState): @@ -471,8 +500,9 @@ def event_process_communication(self, pdata): def event_tick_60(self, prev_current_time): self.tick_count += 1 - # send disk usage info periodically - self.send_disk_usage_info() + # get disk usage info periodically + disk_usage_info = self.get_disk_usage() + # typical ntp sync time is about 5 min - first time, # we scan only after 10 min if self.tick_count >= 10: @@ -480,6 +510,27 @@ def event_tick_60(self, prev_current_time): if self.update_process_core_file_list(): self.send_process_state_db(['default']) + # get system mem/cpu usage + system_mem_cpu_usage = self.get_system_mem_cpu_usage() + + # get processes mem/cpu usage + process_mem_cpu_usage = self.get_all_processes_mem_cpu_usage() + + # send above encoded buffer + NodeStatus = self.get_node_status_class() + NodeStatusUVE = self.get_node_status_uve_class() + node_status = NodeStatus(name=socket.gethostname(), + disk_usage_info=disk_usage_info, + system_mem_cpu_usage=system_mem_cpu_usage, + process_mem_cpu_usage=process_mem_cpu_usage) + # encode other core file + if self.update_all_core_file(): + node_status.all_core_file_list = self.all_core_file_list + if (self.send_build_info): + node_status.build_info = self.get_build_info() + node_status_uve = NodeStatusUVE(data=node_status) + node_status_uve.send() + current_time = int(time.time()) if ((abs(current_time - prev_current_time)) > 300): # update all process start_times with the updated time diff --git a/src/nodemgr/common/process_stat.py b/src/nodemgr/common/process_stat.py index e651bfd06e3..0459e23d93f 100644 --- a/src/nodemgr/common/process_stat.py +++ b/src/nodemgr/common/process_stat.py @@ -19,3 +19,5 @@ def __init__(self, pname): self.process_state = 'PROCESS_STATE_STOPPED' self.group = 'default' self.name = socket.gethostname() + self.pname = pname + self.pid = 0 diff --git a/src/nodemgr/config_nodemgr/config_event_manager.py b/src/nodemgr/config_nodemgr/config_event_manager.py index 1f1eb9b5e88..93d456472b0 100644 --- a/src/nodemgr/config_nodemgr/config_event_manager.py +++ b/src/nodemgr/config_nodemgr/config_event_manager.py @@ -35,7 +35,7 @@ NodeStatusUVE, NodeStatus from pysandesh.connection_info import ConnectionState from cfgm_common.uve.cfgm_cpuinfo.process_info.ttypes import \ - ProcessStatus, ProcessState, ProcessInfo, DiskPartitionUsageStats + ProcessStatus, ProcessState, ProcessInfo from cfgm_common.uve.cfgm_cpuinfo.process_info.constants import \ ProcessStateNames @@ -64,6 +64,7 @@ def __init__(self, rule_file, discovery_server, self.module_id, self.instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) + self.send_system_cpu_info() # end __init__ def process(self): @@ -82,10 +83,12 @@ def send_nodemgr_process_status(self): ProcessStateNames, ProcessState, ProcessStatus, NodeStatus, NodeStatusUVE) + def get_node_status_class(self): + return NodeStatus + + def get_node_status_uve_class(self): + return NodeStatusUVE + def get_process_state(self, fail_status_bits): return self.get_process_state_base( fail_status_bits, ProcessStateNames, ProcessState) - - def send_disk_usage_info(self): - self.send_disk_usage_info_base( - NodeStatusUVE, NodeStatus, DiskPartitionUsageStats) diff --git a/src/nodemgr/control_nodemgr/control_event_manager.py b/src/nodemgr/control_nodemgr/control_event_manager.py index f6f3c74e957..2748227b6bc 100644 --- a/src/nodemgr/control_nodemgr/control_event_manager.py +++ b/src/nodemgr/control_nodemgr/control_event_manager.py @@ -35,7 +35,7 @@ from control_node.control_node.ttypes \ import NodeStatusUVE, NodeStatus from control_node.control_node.process_info.ttypes \ - import ProcessStatus, ProcessState, ProcessInfo, DiskPartitionUsageStats + import ProcessStatus, ProcessState, ProcessInfo from control_node.control_node.process_info.constants import \ ProcessStateNames @@ -64,6 +64,7 @@ def __init__(self, rule_file, discovery_server, self.instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) + self.send_system_cpu_info() # end __init__ def process(self): @@ -82,10 +83,12 @@ def send_nodemgr_process_status(self): ProcessStateNames, ProcessState, ProcessStatus, NodeStatus, NodeStatusUVE) + def get_node_status_class(self): + return NodeStatus + + def get_node_status_uve_class(self): + return NodeStatusUVE + def get_process_state(self, fail_status_bits): return self.get_process_state_base( fail_status_bits, ProcessStateNames, ProcessState) - - def send_disk_usage_info(self): - self.send_disk_usage_info_base( - NodeStatusUVE, NodeStatus, DiskPartitionUsageStats) diff --git a/src/nodemgr/database_nodemgr/database_event_manager.py b/src/nodemgr/database_nodemgr/database_event_manager.py index aacea7a17bc..c0162f2277e 100644 --- a/src/nodemgr/database_nodemgr/database_event_manager.py +++ b/src/nodemgr/database_nodemgr/database_event_manager.py @@ -40,7 +40,7 @@ DatabaseUsageInfo, DatabaseUsage from pysandesh.connection_info import ConnectionState from database.sandesh.database.process_info.ttypes import \ - ProcessStatus, ProcessState, ProcessInfo, DiskPartitionUsageStats + ProcessStatus, ProcessState, ProcessInfo from database.sandesh.database.process_info.constants import \ ProcessStateNames @@ -83,6 +83,7 @@ def __init__(self, rule_file, discovery_server, self.instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) + self.send_system_cpu_info() # end __init__ def _get_cassandra_config_option(self, config): @@ -161,6 +162,12 @@ def send_nodemgr_process_status(self): ProcessStateNames, ProcessState, ProcessStatus, NodeStatus, NodeStatusUVE) + def get_node_status_class(self): + return NodeStatus + + def get_node_status_uve_class(self): + return NodeStatusUVE + def get_process_state(self, fail_status_bits): return self.get_process_state_base( fail_status_bits, ProcessStateNames, ProcessState) @@ -251,10 +258,6 @@ def cassandra_repair(self): "--debug"]) #end cassandra_repair - def send_disk_usage_info(self): - self.send_disk_usage_info_base( - NodeStatusUVE, NodeStatus, DiskPartitionUsageStats) - def runforever(self, test=False): prev_current_time = int(time.time()) while 1: diff --git a/src/nodemgr/setup.py b/src/nodemgr/setup.py index 309eb93c4b2..6dea1282313 100644 --- a/src/nodemgr/setup.py +++ b/src/nodemgr/setup.py @@ -12,7 +12,9 @@ 'nodemgr.config_nodemgr', 'nodemgr.database_nodemgr', 'nodemgr.vrouter_nodemgr', - 'nodemgr.common'], + 'nodemgr.common', + 'nodemgr.common.sandesh', + 'nodemgr.common.sandesh.cpuinfo'], package_data={'': ['*.html', '*.css', '*.xml']}, zip_safe=False, long_description="Nodemgr Implementation", diff --git a/src/nodemgr/vrouter_nodemgr/vrouter_event_manager.py b/src/nodemgr/vrouter_nodemgr/vrouter_event_manager.py index 2b0f5fcd992..89d98ca9cf1 100644 --- a/src/nodemgr/vrouter_nodemgr/vrouter_event_manager.py +++ b/src/nodemgr/vrouter_nodemgr/vrouter_event_manager.py @@ -38,7 +38,7 @@ NodeStatusUVE, NodeStatus from pysandesh.connection_info import ConnectionState from vrouter.vrouter.process_info.ttypes import \ - ProcessStatus, ProcessState, ProcessInfo, DiskPartitionUsageStats + ProcessStatus, ProcessState, ProcessInfo from vrouter.vrouter.process_info.constants import \ ProcessStateNames @@ -71,6 +71,7 @@ def __init__(self, rule_file, discovery_server, NodeStatusUVE, NodeStatus) self.lb_stats = LoadbalancerStats() + self.send_system_cpu_info() # end __init__ def msg_log(self, msg, level): @@ -94,14 +95,16 @@ def send_nodemgr_process_status(self): ProcessStateNames, ProcessState, ProcessStatus, NodeStatus, NodeStatusUVE) + def get_node_status_class(self): + return NodeStatus + + def get_node_status_uve_class(self): + return NodeStatusUVE + def get_process_state(self, fail_status_bits): return self.get_process_state_base( fail_status_bits, ProcessStateNames, ProcessState) - def send_disk_usage_info(self): - self.send_disk_usage_info_base( - NodeStatusUVE, NodeStatus, DiskPartitionUsageStats) - def get_process_stat_object(self, pname): return VrouterProcessStat(pname) diff --git a/src/opserver/cpuinfo.py b/src/opserver/cpuinfo.py index cc645f862c9..c8db368a901 100644 --- a/src/opserver/cpuinfo.py +++ b/src/opserver/cpuinfo.py @@ -32,6 +32,8 @@ def _get_sys_mem_info(self): sys_mem_info.total = virtmem_info.total/1024 sys_mem_info.used = virtmem_info.used/1024 sys_mem_info.free = virtmem_info.free/1024 + sys_mem_info.buffers = virtmem_info.buffers/1024 + sys_mem_info.cached = virtmem_info.cached/1024 return sys_mem_info #end _get_sys_mem_info diff --git a/src/vnsw/agent/uve/cpuinfo.py b/src/vnsw/agent/uve/cpuinfo.py index 101f9dafe8b..48e0536d1b6 100644 --- a/src/vnsw/agent/uve/cpuinfo.py +++ b/src/vnsw/agent/uve/cpuinfo.py @@ -24,6 +24,8 @@ def _get_sys_mem_info(self): sys_mem_info.total = virtmem_info.total/1024 sys_mem_info.used = virtmem_info.used/1024 sys_mem_info.free = virtmem_info.free/1024 + sys_mem_info.buffers = virtmem_info.buffers/1024 + sys_mem_info.cached = virtmem_info.cached/1024 return sys_mem_info #end _get_sys_mem_info diff --git a/src/vnsw/agent/uve/vrouter.sandesh b/src/vnsw/agent/uve/vrouter.sandesh index 15e17f773ca..ab1d5b531f2 100644 --- a/src/vnsw/agent/uve/vrouter.sandesh +++ b/src/vnsw/agent/uve/vrouter.sandesh @@ -335,6 +335,9 @@ struct NodeStatus { 6: optional list disk_usage_info (tags="") 7: optional string description 8: optional list all_core_file_list + 10: optional list process_mem_cpu_usage (aggtype="union") + 11: optional cpuinfo.SystemMemCpuUsage system_mem_cpu_usage + 12: optional cpuinfo.SystemCpuInfo system_cpu_info } /** diff --git a/src/vnsw/agent/uve/vrouter_uve_entry_base.cc b/src/vnsw/agent/uve/vrouter_uve_entry_base.cc index 4af636cdf22..11ed825a2d8 100644 --- a/src/vnsw/agent/uve/vrouter_uve_entry_base.cc +++ b/src/vnsw/agent/uve/vrouter_uve_entry_base.cc @@ -794,7 +794,9 @@ void VrouterUveEntryBase::BuildAndSendComputeCpuStateMsg(const CpuLoadInfo &info ainfo.set_cpu_share(info.get_cpu_share()); ainfo.set_mem_virt(info.get_meminfo().get_virt()); ainfo.set_mem_res(info.get_meminfo().get_res()); - ainfo.set_used_sys_mem(info.get_sys_mem_info().get_used()); + const SysMemInfo &sys_mem_info(info.get_sys_mem_info()); + ainfo.set_used_sys_mem(sys_mem_info.get_used() - + sys_mem_info.get_buffers() - sys_mem_info.get_cached()); ainfo.set_one_min_cpuload(info.get_cpuload().get_one_min_avg()); aciv.push_back(ainfo); astate.set_cpu_info(aciv);