Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Packaging changes to faciliate monitoring of cassandra:
1. Addition of a wrapper init.d script - contrail-database that controls when cassandra can be started based on the time since last started 2. Move cassandra out of supervisord Partial-Bug: #1484297 Conflicts: common/control_files/supervisord_database.conf Change-Id: Ie3b550fb0e3d9ba966aeaac9727bfd01581443f7 (cherry picked from commit bbaf90f)
- Loading branch information
Megh Bhatt
committed
Sep 22, 2015
1 parent
f148314
commit e46317b
Showing
4 changed files
with
164 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,157 @@ | ||
#!/usr/bin/env bash | ||
#! /bin/sh | ||
### BEGIN INIT INFO | ||
# Provides: contrail-database wrapper over cassandra | ||
# Default-Start: 2 3 4 5 | ||
# Default-Stop: 0 1 6 | ||
# Short-Description: contrail database wrapper over cassandra | ||
# Description: Determines the time since last cassandra shutdown: | ||
# 1. If greater than gc_grace_seconds, then does not start | ||
# cassandra | ||
# 2. If less than gc_grace_seconds and greater than hinted | ||
# handoff time, then starts cassandra and starts | ||
# nodetool repair | ||
# 3. If less than hinted handoff time, then starts | ||
# cassandra | ||
### END INIT INFO | ||
|
||
# chkconfig: 2345 99 01 | ||
# description: Juniper Network Virtualization Collector | ||
# Author: Megh Bhatt <meghb@juniper.net> | ||
|
||
supervisorctl -s unix:///tmp/supervisord_database.sock ${1} `basename ${0}` | ||
NAME=cassandra | ||
PIDFILE=/var/run/$NAME.pid | ||
SERVICE="service $NAME" | ||
CLUSTER_STATUS_UP_FILE=/var/log/cassandra/status-up | ||
DEFAULT_GC_GRACE_SECONDS=864000 | ||
DEFAULT_HINTED_HANDOFF_SECONDS=10800 | ||
REPAIR=/usr/bin/contrail-cassandra-repair | ||
NODETOOL=/usr/bin/nodetool | ||
|
||
# | ||
# Function that determines secs since last stop of cassandra | ||
# | ||
seconds_since_last_stop() | ||
{ | ||
if [ -f $CLUSTER_STATUS_UP_FILE ]; then | ||
local time_last_up=`stat -c %Y $CLUSTER_STATUS_UP_FILE` | ||
local now=`date +%s` | ||
SECONDS_SINCE_LAST_STOP=$((now - time_last_up)) | ||
else | ||
SECONDS_SINCE_LAST_STOP=0 | ||
fi | ||
} | ||
|
||
# | ||
# Function that determines the maximum allowed down seconds. By default | ||
# this is 90% of DEFAULT_GC_GRACE_SECONDS | ||
# | ||
max_allowed_down_seconds() | ||
{ | ||
MAX_ALLOWED_DOWN_SECONDS=$((DEFAULT_GC_GRACE_SECONDS * 9 / 10)) | ||
} | ||
|
||
# | ||
# Function that determines the maximum down time allowed without | ||
# the need to run repair in secs, By default this is %90 of | ||
# DEFAULT_HINTED_HANDOFF_SECONDS | ||
# | ||
max_down_seconds_before_repair() | ||
{ | ||
MAX_DOWN_SECONDS_BEFORE_REPAIR=$((DEFAULT_HINTED_HANDOFF_SECONDS * 9 / 10)) | ||
} | ||
|
||
# | ||
# Function that returns 0 if process is running, or nonzero if not. | ||
# | ||
# The nonzero value is 3 if the process is simply not running, and 1 if the | ||
# process is not running but the pidfile exists (to match the exit codes for | ||
# the "status" command; see LSB core spec 3.1, section 20.2) | ||
# | ||
CMD_PATT="-user.cassandra.+CassandraDaemon" | ||
is_running() | ||
{ | ||
if [ -f $PIDFILE ]; then | ||
pid=`cat $PIDFILE` | ||
grep -Eq "$CMD_PATT" "/proc/$pid/cmdline" 2>/dev/null && return 0 | ||
return 1 | ||
fi | ||
return 3 | ||
} | ||
|
||
# | ||
# Function that starts the daemon/service | ||
# | ||
do_start() | ||
{ | ||
# Return | ||
# 0 if daemon has been started | ||
# 1 if daemon was already running | ||
# 2 if daemon could not be started | ||
is_running && return 1 | ||
|
||
max_allowed_down_seconds | ||
max_down_seconds_before_repair | ||
seconds_since_last_stop | ||
|
||
if [ "$SECONDS_SINCE_LAST_STOP" -lt "$MAX_DOWN_SECONDS_BEFORE_REPAIR" ]; then | ||
# Now call the cassandra init script | ||
$SERVICE start | ||
stat=$? | ||
return "$stat" | ||
elif [ "$SECONDS_SINCE_LAST_STOP" -ge "$MAX_DOWN_SECONDS_BEFORE_REPAIR" ] && | ||
[ "$SECONDS_SINCE_LAST_STOP" -lt "$MAX_ALLOWED_DOWN_SECONDS" ]; then | ||
# Now call the cassandra init script | ||
$SERVICE start | ||
stat=$? | ||
if [ "$stat" -eq 0 ]; then | ||
# Wait for cassandra to startup and join the cluster | ||
echo "Waiting for cassandra initialization to complete..." | ||
$NODETOOL info > /dev/null 2>&1 | ||
istatus=$? | ||
icounter=0 | ||
while [ "$istatus" -ne 0 ] && [ "$icounter" -lt 10 ]; do | ||
sleep 1 | ||
$NODETOOL info > /dev/null 2>&1 | ||
istatus=$? | ||
icounter=$(($icounter + 1)) | ||
done | ||
if [ "$istatus" -ne 0 ]; then | ||
echo "Cassandra initialization not yet complete, please run cassandra repair after initialization is done" | ||
return 0 | ||
fi | ||
# Extract node ID to check cluster joining status | ||
echo "Waiting for cassandra to join cluster..." | ||
nodeid=$($NODETOOL info | grep ID | awk '{print $3}') | ||
nstatus=$($NODETOOL status | grep $nodeid | awk '{print $1}') | ||
ncounter=0 | ||
while [ -z $(echo $nstatus | grep U) ] && [ "$ncounter" -lt 10 ]; do | ||
sleep 1 | ||
nstatus=$($NODETOOL status | grep $nodeid | awk '{print $1}') | ||
ncounter=$(($ncounter + 1)) | ||
done | ||
if [ "$ncounter" -ge 10 ]; then | ||
echo "Cassandra not yet joined cluster, please run cassandra repair after cluster formation" | ||
return 0 | ||
fi | ||
# Start nodetool repair | ||
echo "Starting cassandra repair, check /var/log/cassandra/repair-*.log for progress" | ||
$REPAIR --log-file /var/log/cassandra/repair.log &> /dev/null & | ||
fi | ||
else | ||
echo "Cassandra has been down for at least $MAX_ALLOWED_DOWN_SECONDS seconds, not starting" | ||
return 2 | ||
fi | ||
} | ||
|
||
case "$1" in | ||
start) | ||
do_start | ||
;; | ||
*) | ||
$SERVICE "$1" | ||
stat=$? | ||
exit "$stat" | ||
;; | ||
esac | ||
|
||
: | ||
|
||
# vi:ai sw=4 ts=4 tw=0 et |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters