/
contrail-database.initd
executable file
·157 lines (145 loc) · 4.89 KB
/
contrail-database.initd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#! /bin/sh
### BEGIN INIT INFO
# Provides: contrail-database wrapper over cassandra
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: contrail database wrapper over cassandra
# Description: Determines the time since last cassandra shutdown:
# 1. If greater than gc_grace_seconds, then does not start
# cassandra
# 2. If less than gc_grace_seconds and greater than hinted
# handoff time, then starts cassandra and starts
# nodetool repair
# 3. If less than hinted handoff time, then starts
# cassandra
### END INIT INFO
# Author: Megh Bhatt <meghb@juniper.net>
NAME=cassandra
PIDFILE=/var/run/$NAME.pid
SERVICE="service $NAME"
CLUSTER_STATUS_UP_FILE=/var/log/cassandra/status-up
DEFAULT_GC_GRACE_SECONDS=864000
DEFAULT_HINTED_HANDOFF_SECONDS=10800
REPAIR=/usr/bin/contrail-cassandra-repair
NODETOOL=/usr/bin/nodetool
#
# Function that determines secs since last stop of cassandra
#
seconds_since_last_stop()
{
if [ -f $CLUSTER_STATUS_UP_FILE ]; then
local time_last_up=`stat -c %Y $CLUSTER_STATUS_UP_FILE`
local now=`date +%s`
SECONDS_SINCE_LAST_STOP=$((now - time_last_up))
else
SECONDS_SINCE_LAST_STOP=0
fi
}
#
# Function that determines the maximum allowed down seconds. By default
# this is 90% of DEFAULT_GC_GRACE_SECONDS
#
max_allowed_down_seconds()
{
MAX_ALLOWED_DOWN_SECONDS=$((DEFAULT_GC_GRACE_SECONDS * 9 / 10))
}
#
# Function that determines the maximum down time allowed without
# the need to run repair in secs, By default this is %90 of
# DEFAULT_HINTED_HANDOFF_SECONDS
#
max_down_seconds_before_repair()
{
MAX_DOWN_SECONDS_BEFORE_REPAIR=$((DEFAULT_HINTED_HANDOFF_SECONDS * 9 / 10))
}
#
# Function that returns 0 if process is running, or nonzero if not.
#
# The nonzero value is 3 if the process is simply not running, and 1 if the
# process is not running but the pidfile exists (to match the exit codes for
# the "status" command; see LSB core spec 3.1, section 20.2)
#
CMD_PATT="-user.cassandra.+CassandraDaemon"
is_running()
{
if [ -f $PIDFILE ]; then
pid=`cat $PIDFILE`
grep -Eq "$CMD_PATT" "/proc/$pid/cmdline" 2>/dev/null && return 0
return 1
fi
return 3
}
#
# Function that starts the daemon/service
#
do_start()
{
# Return
# 0 if daemon has been started
# 1 if daemon was already running
# 2 if daemon could not be started
is_running && return 1
max_allowed_down_seconds
max_down_seconds_before_repair
seconds_since_last_stop
if [ "$SECONDS_SINCE_LAST_STOP" -lt "$MAX_DOWN_SECONDS_BEFORE_REPAIR" ]; then
# Now call the cassandra init script
$SERVICE start
stat=$?
return "$stat"
elif [ "$SECONDS_SINCE_LAST_STOP" -ge "$MAX_DOWN_SECONDS_BEFORE_REPAIR" ] &&
[ "$SECONDS_SINCE_LAST_STOP" -lt "$MAX_ALLOWED_DOWN_SECONDS" ]; then
# Now call the cassandra init script
$SERVICE start
stat=$?
if [ "$stat" -eq 0 ]; then
# Wait for cassandra to startup and join the cluster
echo "Waiting for cassandra initialization to complete..."
$NODETOOL info > /dev/null 2>&1
istatus=$?
icounter=0
while [ "$istatus" -ne 0 ] && [ "$icounter" -lt 10 ]; do
sleep 1
$NODETOOL info > /dev/null 2>&1
istatus=$?
icounter=$(($icounter + 1))
done
if [ "$istatus" -ne 0 ]; then
echo "Cassandra initialization not yet complete, please run cassandra repair after initialization is done"
return 0
fi
# Extract node ID to check cluster joining status
echo "Waiting for cassandra to join cluster..."
nodeid=$($NODETOOL info | grep ID | awk '{print $3}')
nstatus=$($NODETOOL status | grep $nodeid | awk '{print $1}')
ncounter=0
while [ -z $(echo $nstatus | grep U) ] && [ "$ncounter" -lt 10 ]; do
sleep 1
nstatus=$($NODETOOL status | grep $nodeid | awk '{print $1}')
ncounter=$(($ncounter + 1))
done
if [ "$ncounter" -ge 10 ]; then
echo "Cassandra not yet joined cluster, please run cassandra repair after cluster formation"
return 0
fi
# Start nodetool repair
echo "Starting cassandra repair, check /var/log/cassandra/repair-*.log for progress"
$REPAIR --log-file /var/log/cassandra/repair.log &> /dev/null &
fi
else
echo "Cassandra has been down for at least $MAX_ALLOWED_DOWN_SECONDS seconds, not starting"
return 2
fi
}
case "$1" in
start)
do_start
;;
*)
$SERVICE "$1"
stat=$?
exit "$stat"
;;
esac
:
# vi:ai sw=4 ts=4 tw=0 et