-
Notifications
You must be signed in to change notification settings - Fork 23
/
contrail-cmon-monitor.sh
executable file
·225 lines (200 loc) · 6.96 KB
/
contrail-cmon-monitor.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/bin/bash
# Purpose of the script is to check the state of galera cluster
# Author - Sanju Abraham
source /etc/contrail/ha/cmon_param
LOGFILE=/var/log/contrail/ha/cmon-monitor.log
MYIPS=$(ip a s|sed -ne '/127.0.0.1/!{s/^[ \t]*inet[ \t]*\([0-9.]\+\)\/.*$/\1/p}')
RUN_STATE="isrunning"
CMON_SVC_CHECK="service cmon status"
RUN_CMON="service cmon start"
STOP_CMON="service cmon stop"
MYSQL_SVC_CHECK="service mysql status"
HAP_RESTART="service haproxy restart"
ARP_CACHE_FLUSH="arp -d $VIP"
cmon_run=0
viponme=0
haprestart=0
RMQ_MONITOR="/opt/contrail/bin/contrail-rmq-monitor.sh"
NOVA_SCHED_CHK="supervisorctl -s unix:///tmp/supervisord_openstack.sock status nova-scheduler"
NOVA_CONS_CHK="supervisorctl -s unix:///tmp/supervisord_openstack.sock status nova-console"
NOVA_CONSAUTH_CHK="supervisorctl -s unix:///tmp/supervisord_openstack.sock status nova-consoleauth"
NOVA_COND_CHK="supervisorctl -s unix:///tmp/supervisord_openstack.sock status nova-conductor"
NOVA_SCHED_RST="service nova-scheduler restart"
NOVA_CONS_RST="service nova-console restart"
NOVA_CONSAUTH_RST="service nova-consoleauth restart"
NOVA_COND_RST="service nova-conductor restart"
NOVA_COND_STOP="service nova-conductor stop"
NOVA_COND_START="service nova-conductor start"
NOVA_COND_STATUS="service nova-conductor status"
NOVA_SCHED_STOP="service nova-scheduler stop"
NOVA_SCHED_START="service nova-scheduler start"
NOVA_SCHED_STATUS="service nova-scheduler status"
NOVA_RUN_STATE="RUNNING"
STATE_EXITED="EXITED"
STATE_FATAL="FATAL"
cmon_user_pass="cmon"
SET_CMON_PURGE="update cmon_configuration set value=1 where param='PURGE';"
SET_CMON_SCHEMA_PARAM="update cmon_configuration set value=86400 where param='db_schema_stats_collection_interval';"
SET_CMON_STATS_COLL_PARAM="update cmon_configuration set value=1440 where param='db_stats_collection_interval';"
SET_CMON_HOST_COLL_PARAM="update cmon_configuration set value=1440 where param='host_stats_collection_interval';"
SET_CMON_LOG_COLL_PARAM="update cmon_configuration set value=1440 where param='log_collection_interval';"
SET_CMON_STATS_PARAM="update cmon_configuration set value=720 where param='db_hourly_stats_collection_interval';"
SET_CMON_BACKUP_RETENTION="update cmon_configuration set value=1 where param='BACKUP_RETENTION';"
timestamp() {
date +"%T"
}
log_error_msg() {
msg=$1
echo "$(timestamp): ERROR: $msg" >> $LOGFILE
}
log_warn_msg() {
msg=$1
echo "$(timestamp): WARNING: $msg" >> $LOGFILE
}
log_info_msg() {
msg=$1
echo "$(timestamp): INFO: $msg" >> $LOGFILE
}
for y in $MYIPS
do
if [ $y == $VIP ]; then
viponme=1
log_info_msg "VIP - $VIP is on this node"
break
fi
done
verify_mysql() {
mysqlsvc=$($MYSQL_SVC_CHECK | awk '{print $3 $4}')
mysqlpid=$(pidof mysqld)
if [ $mysqlsvc == $RUN_STATE ] && [ -n "$mysqlpid" ]; then
log_info_msg "MySQL is Running"
echo "y"
return 1
else
log_info_msg "MySQL is not Running"
echo "n"
return 0
fi
exit 1
}
verify_cmon() {
cmon=$($CMON_SVC_CHECK | awk '{print $2 $3}')
cmonpid=$(pidof cmon)
if [ $cmon == $RUN_STATE ] && [ -n "$cmonpid" ]; then
log_info_msg "CMON is Running"
echo "y"
return 1
else
log_info_msg "CMON is not Running"
echo "n"
return 0
fi
}
verify_nova_cond() {
cond=$($NOVA_COND_STATUS | awk '{print $2}')
if [ $cond == $NOVA_RUN_STATE ]; then
echo "y"
return 1
else
echo "n"
return 0
fi
}
verify_nova_sched() {
sched=$($NOVA_SCHED_STATUS | awk '{print $2}')
if [ $sched == $NOVA_RUN_STATE ]; then
echo "y"
return 1
else
echo "n"
return 0
fi
}
# These checks will eventually be replaced when we have nodemgr plugged in
# for openstack services
# CHECK FOR NOVA SCHD
state=$($NOVA_SCHED_CHK | awk '{print $2}')
if [ "$state" == "$STATE_EXITED" ] || [ "$state" == "$STATE_FATAL" ]; then
(exec $NOVA_SCHED_RST)&
log_info_msg "Nova Scheduler restarted becuase of the state $state"
fi
# CHECK FOR NOVA CONS
state=$($NOVA_CONS_CHK | awk '{print $2}')
if [ "$state" == "$STATE_EXITED" ] || [ "$state" == "$STATE_FATAL" ]; then
(exec $NOVA_CONS_RST)&
log_info_msg "Nova Console restarted becuase of the state $state"
fi
# CHECK FOR NOVA CONSAUTH
state=$($NOVA_CONSAUTH_CHK | awk '{print $2}')
if [ "$state" == "$STATE_EXITED" ] || [ "$state" == "$STATE_FATAL" ]; then
(exec $NOVA_CONSAUTH_RST)&
log_info_msg "Nova ConsoleAuth restarted becuase of the state $state"
fi
# CHECK FOR NOVA COND
state=$($NOVA_COND_CHK | awk '{print $2}')
if [ "$state" == "$STATE_EXITED" ] || [ "$state" == "$STATE_FATAL" ]; then
(exec $NOVA_COND_RST)&
log_info_msg "Nova Conductor restarted becuase of the state $state"
fi
cmon_run=$(verify_cmon)
# Check for cmon and if its the VIP node let cmon run or start it
if [ $viponme -eq 1 ]; then
if [ $cmon_run == "n" ]; then
(exec $RUN_CMON)&
log_info_msg "Started CMON on detecting VIP"
mysql -u${cmon_user_pass} -p${cmon_user_pass} -e "USE cmon; ${SET_CMON_PURGE}"
mysql -u${cmon_user_pass} -p${cmon_user_pass} -e "USE cmon; ${SET_CMON_SCHEMA_PARAM}"
mysql -u${cmon_user_pass} -p${cmon_user_pass} -e "USE cmon; ${SET_CMON_STATS_COLL_PARAM}"
mysql -u${cmon_user_pass} -p${cmon_user_pass} -e "USE cmon; ${SET_CMON_HOST_COLL_PARAM}"
mysql -u${cmon_user_pass} -p${cmon_user_pass} -e "USE cmon; ${SET_CMON_LOG_COLL_PARAM}"
mysql -u${cmon_user_pass} -p${cmon_user_pass} -e "USE cmon; ${SET_CMON_STATS_PARAM}"
mysql -u${cmon_user_pass} -p${cmon_user_pass} -e "USE cmon; ${SET_CMON_BACKUP_RETENTION}"
log_info_msg "Done setting params for cmon"
(exec $RMQ_MONITOR)&
fi
# Check periodically for RMQ status
if [[ -n "$PERIODIC_RMQ_CHK_INTER" ]]; then
sleep $PERIODIC_RMQ_CHK_INTER
(exec $RMQ_MONITOR)&
fi
else
if [ $cmon_run == "y" ]; then
(exec $STOP_CMON)&
log_info_msg "Stopped CMON on not finding VIP"
#Check if the VIP was on this node and clear all session by restarting haproxy
hapid=$(pidof haproxy)
for (( i=0; i<${DIPS_SIZE}; i++ ))
do
dipsonnonvip=$(lsof -p $hapid | grep ${DIPS[i]} | awk '{print $9}')
if [[ -n "$dipsonnonvip" ]]; then
haprestart=1
break
fi
done
for (( i=0; i<${DIPS_HOST_SIZE}; i++ ))
do
dipsonnonvip=$(lsof -p $hapid | grep ${DIPHOSTS[i]} | awk '{print $9}')
if [[ -n "$dipsonnonvip" ]]; then
haprestart=1
break
fi
done
if [ $haprestart -eq 1 ]; then
(exec $HAP_RESTART)&
log_info_msg "Restarted HAP becuase of stale dips"
fi
fi
fi
#Cleanup if there exists sockets in CLOSE_WAIT
clssoc=$(netstat -natp | grep 33306 | grep CLOSE_WAIT)
if [[ $clssoc -ne 0 ]]; then
netstat -anp |\
grep ':33306 ' |\
grep CLOSE_WAIT |\
awk '{print $7}' |\
cut -d \/ -f1 |\
grep -oE "[[:digit:]]{1,}" |\
xargs kill -9
log_info_msg "Cleaned connections to mysql that were in CLOSE_WAIT"
fi
exit 0