Skip to content

Commit

Permalink
This commit adds reporting of cassandra thread pool stats
Browse files Browse the repository at this point in the history
for monitoring. It collects the nodestatus output and
reports it every min.
Closes-Bug: 1583733,1588156,1589039

(cherry picked from commit eda3203)

Conflicts:
	src/nodemgr/database_nodemgr/database_event_manager.py

send_database_status uses the inner structure
cassandra_compaction_task without initializing,
the fix takes care of that.

(cherry picked from commit 49f6637)

The output of 'nodetool compactiontasks' can contain multiple lines
and hence we should grep for line having 'pending tasks' before
getting the value..

(cherry picked from commit f652617)

Change-Id: I78993df71a32295e9697768e4b64592e6c4c9405
  • Loading branch information
arvindvis authored and Raj Reddy committed Jun 7, 2016
1 parent 1cf8411 commit 214d61e
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 2 deletions.
27 changes: 27 additions & 0 deletions src/analytics/database/database.sandesh
Expand Up @@ -67,3 +67,30 @@ struct NodeStatus {
uve sandesh NodeStatusUVE {
1: NodeStatus data
}

struct CassandraThreadPoolStats {
1: string pool_name
2: u64 active
3: u64 pending
4: u64 all_time_blocked
}

struct CassandraCompactionTask {
1: u64 pending_compaction_tasks
}
struct CassandraStatusData {
1: string name (key="ObjectDatabaseInfo")
2: optional bool deleted
3: CassandraCompactionTask cassandra_compaction_task (tags="")
4: list<CassandraThreadPoolStats> thread_pool_stats (tags=".pool_name")
}

/**
* @description: For monitoring nodetool tpstats and compaction pending counts
* contrail-database-nodemgr
* @object: database-node
*/
uve sandesh CassandraStatusUVE {
1: CassandraStatusData data
}

66 changes: 64 additions & 2 deletions src/nodemgr/database_nodemgr/database_event_manager.py
Expand Up @@ -31,13 +31,14 @@
from sandesh_common.vns.ttypes import Module, NodeType
from sandesh_common.vns.constants import ModuleNames, NodeTypeNames,\
Module2NodeType, INSTANCE_ID_DEFAULT, SERVICE_CONTRAIL_DATABASE, \
RepairNeededKeyspaces
RepairNeededKeyspaces, ThreadPoolNames
from subprocess import Popen, PIPE
from StringIO import StringIO

from database.sandesh.database.ttypes import \
NodeStatusUVE, NodeStatus, DatabaseUsageStats,\
DatabaseUsageInfo, DatabaseUsage
DatabaseUsageInfo, DatabaseUsage, CassandraStatusUVE,\
CassandraStatusData,CassandraThreadPoolStats, CassandraCompactionTask
from pysandesh.connection_info import ConnectionState
from database.sandesh.database.process_info.ttypes import \
ProcessStatus, ProcessState, ProcessInfo
Expand Down Expand Up @@ -249,12 +250,73 @@ def database_periodic(self):
else:
self.fail_status_bits &= ~self.FAIL_STATUS_SERVER_PORT
self.send_nodemgr_process_status()
# Send cassandra nodetool information
self.send_database_status()
# Record cluster status and shut down cassandra if needed
subprocess.Popen(["contrail-cassandra-status",
"--log-file", "/var/log/cassandra/status.log",
"--debug"])
# end database_periodic

def send_database_status(self):
cassandra_status_uve = CassandraStatusUVE()
cassandra_status = CassandraStatusData()
cassandra_status.cassandra_compaction_task = CassandraCompactionTask()
# Get compactionstats
compaction_count = subprocess.Popen("nodetool compactionstats|grep 'pending tasks:'",
shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
op, err = compaction_count.communicate()
if compaction_count.returncode != 0:
msg = "Failed to get nodetool compactionstats " + err
self.msg_log(msg, level=SandeshLevel.SYS_ERR)
return
cassandra_status.cassandra_compaction_task.pending_compaction_tasks = \
self.get_pending_compaction_count(op)
# Get the tpstats value
tpstats_op = subprocess.Popen(["nodetool", "tpstats"], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
op, err = tpstats_op.communicate()
if tpstats_op.returncode != 0:
msg = "Failed to get nodetool tpstats " + err
self.msg_log(msg, level=SandeshLevel.SYS_ERR)
return
cassandra_status.thread_pool_stats = self.get_tp_status(op)
cassandra_status.name = socket.gethostname()
cassandra_status_uve = CassandraStatusUVE(data=cassandra_status)
msg = 'Sending UVE: ' + str(cassandra_status_uve)
self.sandesh_global.logger().log(SandeshLogger.get_py_logger_level(
SandeshLevel.SYS_DEBUG), msg)
cassandra_status_uve.send()
# end send_database_status

def get_pending_compaction_count(self, pending_count):
compaction_count_val = pending_count.strip()
# output is of the format pending tasks: x
pending_count_val = compaction_count_val.split(':')
return int(pending_count_val[1].strip())
# end get_pending_compaction_count

def get_tp_status(self,tp_stats_output):
tpstats_rows = tp_stats_output.split('\n')
thread_pool_stats_list = []
for row_index in range(1, len(tpstats_rows)):
cols = tpstats_rows[row_index].split()
# If tpstats len(cols) > 2, else we have reached the end
if len(cols) > 2:
if (cols[0] in ThreadPoolNames):
# Create a CassandraThreadPoolStats for matching entries
tpstat = CassandraThreadPoolStats()
tpstat.pool_name = cols[0]
tpstat.active = int(cols[1])
tpstat.pending = int(cols[2])
tpstat.all_time_blocked = int(cols[5])
thread_pool_stats_list.append(tpstat)
else:
# Reached end of tpstats, breaking because dropstats follows
break
return thread_pool_stats_list
# end get_tp_status

def cassandra_repair(self):
logdir = self.cassandra_repair_logdir + "repair.log"
subprocess.Popen(["contrail-cassandra-repair",
Expand Down
14 changes: 14 additions & 0 deletions src/sandesh/common/vns.sandesh
Expand Up @@ -490,3 +490,17 @@ const list<string> RepairNeededKeyspaces = [

// Set to default of 10 days
const u32 CASSANDRA_DEFAULT_GC_GRACE_SECONDS = 864000

// Define the threadpool names
const string READSTAGE = "ReadStage"
const string MUTATIONSTAGE = "MutationStage"
const string MEMTABLEFLUSHWRITER = "MemtableFlushWriter"
const string NATIVETRANSPORTREQUESTS = "Native-Transport-Requests"
const string COMPACTIONEXECUTOR = "CompactionExecutor"
const list<string> ThreadPoolNames = [
READSTAGE,
MUTATIONSTAGE,
MEMTABLEFLUSHWRITER,
NATIVETRANSPORTREQUESTS,
COMPACTIONEXECUTOR,
]

0 comments on commit 214d61e

Please sign in to comment.