From 06ac304bf66daeb63f8539c77e68444038397f36 Mon Sep 17 00:00:00 2001 From: Megh Bhatt Date: Wed, 12 Aug 2015 16:16:46 -0700 Subject: [PATCH] 1. Change the gc_grace_seconds for config daemon keyspaces to the default of 10 days 2. Run periodic nodetool repair -pr on the config daemon keyspaces from database nodemgr, default is once every 24 hours. This can be controlled via DEFAULT.cassandra_repair_interval config in /etc/contrail/contrail-database-nodemgr.conf Partial-Bug: #1484297 Change-Id: Ief1f010d0ee3f35fc0f5ff8e1e4eac752c5267e2 --- src/config/common/vnc_cassandra.py | 8 ++++---- src/config/schema-transformer/to_bgp.py | 10 +++++----- src/config/svc-monitor/svc_monitor/db.py | 9 ++++----- src/discovery/disc_cassdb.py | 7 +++++-- src/discovery/disc_server.py | 2 +- src/nodemgr/database_event_manager.py | 17 +++++++++++++++-- src/nodemgr/main.py | 9 +++++++-- src/sandesh/common/vns.sandesh | 16 ++++++++++++++++ 8 files changed, 57 insertions(+), 21 deletions(-) diff --git a/src/config/common/vnc_cassandra.py b/src/config/common/vnc_cassandra.py index 1bbb109c75d..d4c6af6f69f 100644 --- a/src/config/common/vnc_cassandra.py +++ b/src/config/common/vnc_cassandra.py @@ -14,13 +14,15 @@ from pysandesh.gen_py.process_info.ttypes import ConnectionStatus, \ ConnectionType from pysandesh.gen_py.sandesh.ttypes import SandeshLevel +from sandesh_common.vns.constants import API_SERVER_KEYSPACE_NAME, \ + CASSANDRA_DEFAULT_GC_GRACE_SECONDS import time import json import utils class VncCassandraClient(VncCassandraClientGen): # Name to ID mapping keyspace + tables - _UUID_KEYSPACE_NAME = 'config_db_uuid' + _UUID_KEYSPACE_NAME = API_SERVER_KEYSPACE_NAME # TODO describe layout _OBJ_UUID_CF_NAME = 'obj_uuid_table' @@ -143,9 +145,7 @@ def _cassandra_ensure_keyspace(self, server_list, # TODO verify only EEXISTS self._logger("Warning! " + str(e), level=SandeshLevel.SYS_WARN) - gc_grace_sec = 0 - if num_dbnodes > 1: - gc_grace_sec = 60 + gc_grace_sec = CASSANDRA_DEFAULT_GC_GRACE_SECONDS for cf_info in cf_info_list: try: diff --git a/src/config/schema-transformer/to_bgp.py b/src/config/schema-transformer/to_bgp.py index fef394388d8..e4c6e40db5e 100644 --- a/src/config/schema-transformer/to_bgp.py +++ b/src/config/schema-transformer/to_bgp.py @@ -43,7 +43,9 @@ from pysandesh.gen_py.sandesh.ttypes import SandeshLevel from cfgm_common.uve.virtual_network.ttypes import * from sandesh_common.vns.ttypes import Module, NodeType -from sandesh_common.vns.constants import ModuleNames, Module2NodeType, NodeTypeNames, INSTANCE_ID_DEFAULT +from sandesh_common.vns.constants import ModuleNames, Module2NodeType, \ + NodeTypeNames, INSTANCE_ID_DEFAULT, SCHEMA_KEYSPACE_NAME, \ + CASSANDRA_DEFAULT_GC_GRACE_SECONDS from schema_transformer.sandesh.st_introspect import ttypes as sandesh import discoveryclient.client as client try: @@ -2650,7 +2652,7 @@ class SchemaTransformer(object): data + methods used/referred to by ssrc and arc greenlets """ - _KEYSPACE = 'to_bgp_keyspace' + _KEYSPACE = SCHEMA_KEYSPACE_NAME _RT_CF = 'route_target_table' _SC_IP_CF = 'service_chain_ip_address_table' _SERVICE_CHAIN_CF = 'service_chain_table' @@ -3594,9 +3596,7 @@ def _cassandra_init(self): rd_consistency = pycassa.cassandra.ttypes.ConsistencyLevel.QUORUM wr_consistency = pycassa.cassandra.ttypes.ConsistencyLevel.QUORUM - gc_grace_sec = 0 - if num_dbnodes > 1: - gc_grace_sec = 60 + gc_grace_sec = CASSANDRA_DEFAULT_GC_GRACE_SECONDS for cf in column_families: try: diff --git a/src/config/svc-monitor/svc_monitor/db.py b/src/config/svc-monitor/svc_monitor/db.py index 9ed9e10b770..50e8113ed6c 100644 --- a/src/config/svc-monitor/svc_monitor/db.py +++ b/src/config/svc-monitor/svc_monitor/db.py @@ -9,7 +9,8 @@ import pycassa from pycassa.system_manager import * from pysandesh.gen_py.process_info.ttypes import ConnectionStatus - +from sandesh_common.vns.constants import SVC_MONITOR_KEYSPACE_NAME, \ + CASSANDRA_DEFAULT_GC_GRACE_SECONDS import inspect import json import time @@ -17,7 +18,7 @@ class ServiceMonitorDB(object): - _KEYSPACE = 'svc_monitor_keyspace' + _KEYSPACE = SVC_MONITOR_KEYSPACE_NAME def __init__(self, args=None): self._args = args @@ -116,9 +117,7 @@ def _cassandra_init(self, cf_name): # set up column families column_families = [cf_name] - gc_grace_sec = 0 - if num_dbnodes > 1: - gc_grace_sec = 60 + gc_grace_sec = CASSANDRA_DEFAULT_GC_GRACE_SECONDS for cf in column_families: try: sys_mgr.create_column_family(self._keyspace, cf, gc_grace_seconds=gc_grace_sec) diff --git a/src/discovery/disc_cassdb.py b/src/discovery/disc_cassdb.py index 0d2ac8510a6..1068692bc71 100644 --- a/src/discovery/disc_cassdb.py +++ b/src/discovery/disc_cassdb.py @@ -16,9 +16,11 @@ from pycassa.system_manager import * from pycassa.util import * from pycassa.types import * +from sandesh_common.vns.constants import DISCOVERY_SERVER_KEYSPACE_NAME, \ + CASSANDRA_DEFAULT_GC_GRACE_SECONDS class DiscoveryCassandraClient(object): - _DISCOVERY_KEYSPACE_NAME = 'DISCOVERY_SERVER' + _DISCOVERY_KEYSPACE_NAME = DISCOVERY_SERVER_KEYSPACE_NAME _DISCOVERY_CF_NAME = 'discovery' @classmethod @@ -97,7 +99,8 @@ def _cassandra_ensure_keyspace(self, server_list, (cf_name, comparator_type, validator_type) = cf_info sys_mgr.create_column_family(keyspace_name, cf_name, comparator_type = comparator_type, default_validation_class = validator_type) - sys_mgr.alter_column_family(keyspace_name, cf_name, gc_grace_seconds=0) + sys_mgr.alter_column_family(keyspace_name, cf_name, + gc_grace_seconds=CASSANDRA_DEFAULT_GC_GRACE_SECONDS) except pycassa.cassandra.ttypes.InvalidRequestException as e: # TODO verify only EEXISTS print "Warning! " + str(e) diff --git a/src/discovery/disc_server.py b/src/discovery/disc_server.py index d7b14310550..f480bfd0c68 100644 --- a/src/discovery/disc_server.py +++ b/src/discovery/disc_server.py @@ -8,10 +8,10 @@ """ import gevent -from disc_cassdb import DiscoveryCassandraClient from gevent import monkey monkey.patch_all() from gevent import hub +from disc_cassdb import DiscoveryCassandraClient import sys import time diff --git a/src/nodemgr/database_event_manager.py b/src/nodemgr/database_event_manager.py index 11443698dce..9c3c0609db5 100644 --- a/src/nodemgr/database_event_manager.py +++ b/src/nodemgr/database_event_manager.py @@ -27,7 +27,8 @@ from pysandesh.gen_py.sandesh_trace.ttypes import SandeshTraceRequest from sandesh_common.vns.ttypes import Module, NodeType from sandesh_common.vns.constants import ModuleNames, NodeTypeNames,\ - Module2NodeType, INSTANCE_ID_DEFAULT, SERVICE_CONTRAIL_DATABASE + Module2NodeType, INSTANCE_ID_DEFAULT, SERVICE_CONTRAIL_DATABASE, \ + RepairNeededKeyspaces from subprocess import Popen, PIPE from StringIO import StringIO @@ -43,7 +44,7 @@ class DatabaseEventManager(EventManager): def __init__(self, rule_file, discovery_server, discovery_port, collector_addr, - hostip, minimum_diskgb): + hostip, minimum_diskgb, cassandra_repair_interval): EventManager.__init__( self, rule_file, discovery_server, discovery_port, collector_addr) @@ -52,6 +53,7 @@ def __init__(self, rule_file, discovery_server, self.module_id = ModuleNames[self.module] self.hostip = hostip self.minimum_diskgb = minimum_diskgb + self.cassandra_repair_interval = cassandra_repair_interval self.supervisor_serverurl = "unix:///tmp/supervisord_database.sock" self.add_current_process() # end __init__ @@ -200,6 +202,14 @@ def database_periodic(self): # end database_periodic + def cassandra_repair(self): + for keyspace in RepairNeededKeyspaces: + repair_file_name = '/var/log/cassandra/repair-' + keyspace + '.log' + with open(repair_file_name, "a") as repair_file: + subprocess.Popen(["nodetool", "repair", "-pr", keyspace], + stdout=repair_file, stderr=repair_file) + #end cassandra_repair + def send_disk_usage_info(self): self.send_disk_usage_info_base( NodeStatusUVE, NodeStatus, DiskPartitionUsageStats) @@ -230,4 +240,7 @@ def runforever(self, test=False): if headers['eventname'].startswith("TICK_60"): self.database_periodic() prev_current_time = self.event_tick_60(prev_current_time) + # Perform nodetool repair every cassandra_repair_interval hours + if self.tick_count % (60 * self.cassandra_repair_interval) == 0: + self.cassandra_repair() self.listener_nodemgr.ok(self.stdout) diff --git a/src/nodemgr/main.py b/src/nodemgr/main.py index 44c18eef36d..bda730585b2 100755 --- a/src/nodemgr/main.py +++ b/src/nodemgr/main.py @@ -67,7 +67,8 @@ def main(args_str=' '.join(sys.argv[1:])): default = {'rules': '', 'collectors': [], 'hostip': '127.0.0.1', - 'minimum_diskgb': 256 + 'minimum_diskgb': 256, + 'cassandra_repair_interval': 24, } node_type = args.nodetype if (node_type == 'contrail-analytics'): @@ -119,6 +120,9 @@ def main(args_str=' '.join(sys.argv[1:])): help="Minimum disk space in GB's") parser.add_argument("--hostip", help="IP address of host") + parser.add_argument("--cassandra_repair_interval", type=int, + help="Time in hours to periodically run " + "nodetool repair for cassandra maintenance") try: _args = parser.parse_args(remaining_argv) except: @@ -157,10 +161,11 @@ def main(args_str=' '.join(sys.argv[1:])): elif (node_type == 'contrail-database'): hostip = _args.hostip minimum_diskgb = _args.minimum_diskgb + cassandra_repair_interval = _args.cassandra_repair_interval prog = DatabaseEventManager( rule_file, discovery_server, discovery_port, collector_addr, - hostip, minimum_diskgb) + hostip, minimum_diskgb, cassandra_repair_interval) else: sys.stderr.write("Node type" + str(node_type) + "is incorrect" + "\n") return diff --git a/src/sandesh/common/vns.sandesh b/src/sandesh/common/vns.sandesh index 3904d22ef9a..7bd745c8b72 100644 --- a/src/sandesh/common/vns.sandesh +++ b/src/sandesh/common/vns.sandesh @@ -432,3 +432,19 @@ const string ALARM_GENERATOR_SERVICE_NAME = "AlarmGenerator" const string IFMAP_SERVER_DISCOVERY_SERVICE_NAME = "IfmapServer" const string XMPP_SERVER_DISCOVERY_SERVICE_NAME = "xmpp-server" const string DNS_SERVER_DISCOVERY_SERVICE_NAME = "dns-server" + +// Cassandra Keyspace Names +const string SVC_MONITOR_KEYSPACE_NAME = "svc_monitor_keyspace" +const string SCHEMA_KEYSPACE_NAME = "to_bgp_keyspace" +const string API_SERVER_KEYSPACE_NAME = "config_db_uuid" +const string DISCOVERY_SERVER_KEYSPACE_NAME = "DISCOVERY_SERVER" + +const list RepairNeededKeyspaces = [ + API_SERVER_KEYSPACE_NAME, + SCHEMA_KEYSPACE_NAME, + SVC_MONITOR_KEYSPACE_NAME, + DISCOVERY_SERVER_KEYSPACE_NAME, +] + +// Set to default of 10 days +const u32 CASSANDRA_DEFAULT_GC_GRACE_SECONDS = 864000