From ab7569b3e202e3f6692958f170c9fc75fd8cff69 Mon Sep 17 00:00:00 2001 From: Ranjeet R Date: Fri, 24 Jul 2015 21:06:45 -0700 Subject: [PATCH] Fixes: Add new controller to an existing cluster Adding a new controller to an existing cluster through fab. Change-Id: Iea83d9e89e41afde999147a24dc6b4a6edc7e1a2 ClosesBug: 1449232 --- fabfile/tasks/ha.py | 323 +++++++++++++++++++++++++------- fabfile/tasks/helpers.py | 127 +++++++++---- fabfile/tasks/install.py | 34 ++++ fabfile/tasks/provision.py | 369 ++++++++++++++++++++++++------------- fabfile/tasks/rabbitmq.py | 131 ++++++++++--- 5 files changed, 731 insertions(+), 253 deletions(-) diff --git a/fabfile/tasks/ha.py b/fabfile/tasks/ha.py index e099aa5f1..f0277dce7 100644 --- a/fabfile/tasks/ha.py +++ b/fabfile/tasks/ha.py @@ -3,11 +3,12 @@ from fabfile.config import * from fabfile.templates import openstack_haproxy, collector_haproxy from fabfile.tasks.helpers import enable_haproxy, verify_mysql_status -from fabfile.utils.fabos import detect_ostype, get_as_sudo +from fabfile.utils.fabos import detect_ostype, get_as_sudo, is_package_installed from fabfile.utils.host import get_authserver_ip, get_control_host_string,\ hstr_to_ip, get_from_testbed_dict, get_service_token, get_env_passwords,\ get_openstack_internal_vip, get_openstack_external_vip,\ get_contrail_internal_vip, get_contrail_external_vip +from fabfile.utils.cluster import get_orchestrator @task @EXECUTE_TASK @@ -18,12 +19,14 @@ def fix_restart_xinetd_conf(): @task def fix_restart_xinetd_conf_node(*args): - """Fix contrail-mysqlprobe to accept connection only from this node, USAGE:fab fix_restart_xinetd_conf_node:user@1.1.1.1,user@2.2.2.2""" + """Fix contrail-mysqlprobe to accept connection only from this node, + USAGE:fab fix_restart_xinetd_conf_node:user@1.1.1.1,user@2.2.2.2""" for host_string in args: - self_ip = hstr_to_ip(get_control_host_string(host_string)) - sudo("sed -i -e 's#only_from = 0.0.0.0/0#only_from = %s 127.0.0.1#' /etc/xinetd.d/contrail-mysqlprobe" % self_ip) - sudo("service xinetd restart") - sudo("chkconfig xinetd on") + with settings(host_string=host_string): + self_ip = hstr_to_ip(get_control_host_string(host_string)) + sudo("sed -i -e 's#only_from = 0.0.0.0/0#only_from = %s 127.0.0.1#' /etc/xinetd.d/contrail-mysqlprobe" % self_ip) + sudo("service xinetd restart") + sudo("chkconfig xinetd on") @task @EXECUTE_TASK @@ -34,10 +37,11 @@ def fix_memcache_conf(): @task def fix_memcache_conf_node(*args): - """Increases the memcached memory to 2048 and listen address to mgmt ip. USAGE:fab fix_memcache_conf_node:user@1.1.1.1,user@2.2.2.2""" + """Increases the memcached memory to 2048 and listen address to mgmt ip. + USAGE:fab fix_memcache_conf_node:user@1.1.1.1,user@2.2.2.2""" memory = '2048' for host_string in args: - listen_ip = hstr_to_ip(env.host_string) + listen_ip = hstr_to_ip(host_string) with settings(host_string=host_string, warn_only=True): if detect_ostype() == 'ubuntu': memcache_conf='/etc/memcached.conf' @@ -59,21 +63,27 @@ def fix_memcache_conf_node(*args): @EXECUTE_TASK @roles('cfgm') def tune_tcp(): - with settings(hide('stderr'), warn_only=True): - if sudo("grep '^net.netfilter.nf_conntrack_max' /etc/sysctl.conf").failed: - sudo('echo "net.netfilter.nf_conntrack_max = 256000" >> /etc/sysctl.conf') - if sudo("grep '^net.netfilter.nf_conntrack_tcp_timeout_time_wait' /etc/sysctl.conf").failed: - sudo('echo "net.netfilter.nf_conntrack_tcp_timeout_time_wait = 30" >> /etc/sysctl.conf') - if sudo("grep '^net.ipv4.tcp_syncookies' /etc/sysctl.conf").failed: - sudo('echo "net.ipv4.tcp_syncookies = 1" >> /etc/sysctl.conf') - if sudo("grep '^net.ipv4.tcp_tw_recycle' /etc/sysctl.conf").failed: - sudo('echo "net.ipv4.tcp_tw_recycle = 1" >> /etc/sysctl.conf') - if sudo("grep '^net.ipv4.tcp_tw_reuse' /etc/sysctl.conf").failed: - sudo('echo "net.ipv4.tcp_tw_reuse = 1" >> /etc/sysctl.conf') - if sudo("grep '^net.ipv4.tcp_fin_timeout' /etc/sysctl.conf").failed: - sudo('echo "net.ipv4.tcp_fin_timeout = 30" >> /etc/sysctl.conf') - if sudo("grep '^net.unix.max_dgram_qlen' /etc/sysctl.conf").failed: - sudo('echo "net.unix.max_dgram_qlen = 1000" >> /etc/sysctl.conf') + """ Tune TCP parameters in all cfgm nodes """ + execute('tune_tcp_node', env.host_string) + +@task +def tune_tcp_node(*args): + for host_string in args: + with settings(host_string=host_string, warn_only=True): + if sudo("grep '^net.netfilter.nf_conntrack_max' /etc/sysctl.conf").failed: + sudo('echo "net.netfilter.nf_conntrack_max = 256000" >> /etc/sysctl.conf') + if sudo("grep '^net.netfilter.nf_conntrack_tcp_timeout_time_wait' /etc/sysctl.conf").failed: + sudo('echo "net.netfilter.nf_conntrack_tcp_timeout_time_wait = 30" >> /etc/sysctl.conf') + if sudo("grep '^net.ipv4.tcp_syncookies' /etc/sysctl.conf").failed: + sudo('echo "net.ipv4.tcp_syncookies = 1" >> /etc/sysctl.conf') + if sudo("grep '^net.ipv4.tcp_tw_recycle' /etc/sysctl.conf").failed: + sudo('echo "net.ipv4.tcp_tw_recycle = 1" >> /etc/sysctl.conf') + if sudo("grep '^net.ipv4.tcp_tw_reuse' /etc/sysctl.conf").failed: + sudo('echo "net.ipv4.tcp_tw_reuse = 1" >> /etc/sysctl.conf') + if sudo("grep '^net.ipv4.tcp_fin_timeout' /etc/sysctl.conf").failed: + sudo('echo "net.ipv4.tcp_fin_timeout = 30" >> /etc/sysctl.conf') + if sudo("grep '^net.unix.max_dgram_qlen' /etc/sysctl.conf").failed: + sudo('echo "net.unix.max_dgram_qlen = 1000" >> /etc/sysctl.conf') def get_nfs_server(): try: @@ -114,13 +124,17 @@ def setup_glance_images_loc(): @serial @hosts(*env.roledefs['openstack'][1:]) def sync_keystone_ssl_certs(): - host_string = env.host_string - temp_dir= tempfile.mkdtemp() - with settings(host_string=env.roledefs['openstack'][0], password=get_env_passwords(env.roledefs['openstack'][0])): - get_as_sudo('/etc/keystone/ssl/', temp_dir) - with settings(host_string=host_string, password=get_env_passwords(host_string)): - put('%s/ssl/' % temp_dir, '/etc/keystone/', use_sudo=True) - sudo('service keystone restart') + execute('sync_keystone_ssl_certs_node', env.host_string) + +@task +def sync_keystone_ssl_certs_node(*args): + for host_string in args: + temp_dir= tempfile.mkdtemp() + with settings(host_string=env.roledefs['openstack'][0], password=get_env_passwords(env.roledefs['openstack'][0])): + get_as_sudo('/etc/keystone/ssl/', temp_dir) + with settings(host_string=host_string, password=get_env_passwords(host_string)): + put('%s/ssl/' % temp_dir, '/etc/keystone/', use_sudo=True) + sudo('service keystone restart') @task def fix_wsrep_cluster_address(): @@ -153,8 +167,64 @@ def setup_cluster_monitors(): if len(env.roledefs['openstack']) <= 1: print "Single Openstack cluster, skipping starting cluster monitor." return - sudo("service contrail-hamon restart") - sudo("chkconfig contrail-hamon on") + execute('setup_cluster_monitors_node', env.host_string) + +@task +def setup_cluster_monitors_node(*args): + for host_string in args: + with settings(host_string=host_string): + sudo("service contrail-hamon restart") + sudo("chkconfig contrail-hamon on") + +@task +def join_galera_cluster(new_ctrl_host): + """ Task to join a new into an existing Galera cluster """ + execute('setup_passwordless_ssh', *env.roledefs['openstack']) + + # Adding the user permission for the node to be added in + # the other nodes. + new_ctrl_data_host_string = get_control_host_string(new_ctrl_host) + new_ctrl_ip = new_ctrl_data_host_string.split('@')[1] + + for host_string in env.roledefs['openstack']: + if host_string != new_ctrl_host: + with settings(host_string=host_string): + cmd = "add-mysql-perm --node_to_add %s" % new_ctrl_ip + sudo(cmd) + + openstack_host_list = [get_control_host_string(openstack_host)\ + for openstack_host in env.roledefs['openstack']] + + galera_ip_list = [hstr_to_ip(galera_host)\ + for galera_host in openstack_host_list] + + authserver_ip = get_authserver_ip() + internal_vip = get_openstack_internal_vip() + + with settings(host_string = new_ctrl_host): + sudo("setup-vnc-galera\ + --self_ip %s --keystone_ip %s --galera_ip_list %s\ + --internal_vip %s --openstack_index %d" % (new_ctrl_ip, authserver_ip, + ' '.join(galera_ip_list), internal_vip, + (openstack_host_list.index(new_ctrl_data_host_string) + 1))) + + for host_string in env.roledefs['openstack']: + if host_string != new_ctrl_host: + with settings(host_string=host_string): + self_host = get_control_host_string(env.host_string) + self_ip = hstr_to_ip(self_host) + + cmd = "add-galera-config\ + --node_to_add %s\ + --self_ip %s\ + --keystone_ip %s\ + --galera_ip_list %s\ + --internal_vip %s\ + --openstack_index %d" % (new_ctrl_ip, + self_ip, authserver_ip, + ' '.join(galera_ip_list), internal_vip, + (openstack_host_list.index(self_host) + 1)) + sudo(cmd) @task @serial @@ -189,7 +259,6 @@ def setup_galera_cluster(): cmd += ' --external_vip %s' % external_vip sudo(cmd) - @task def setup_keepalived(): """Task to provision VIP for openstack/cfgm nodes with keepalived""" @@ -198,6 +267,14 @@ def setup_keepalived(): if get_contrail_internal_vip() != get_openstack_internal_vip(): execute('setup_contrail_keepalived') +@task +def join_keepalived_cluster(new_ctrl_host): + """Task to configure a new node into an existing keepalived cluster""" + if get_openstack_internal_vip(): + execute('join_openstack_keepalived_node', new_ctrl_host) + if get_contrail_internal_vip() != get_openstack_internal_vip(): + execute('join_contrail_keepalived_node', new_ctrl_host) + @task @serial @roles('openstack') @@ -211,6 +288,22 @@ def setup_openstack_keepalived(): sudo("service haproxy restart") setup_keepalived_node('openstack') +@task +def join_openstack_keepalived_node(new_ctrl_host): + """Task to provision a new node into a cluster of openstack nodes with keepalived""" + with settings(host_string = new_ctrl_host): + enable_haproxy() + sudo("service haproxy restart") + setup_keepalived_node('openstack') + +@task +def join_contrail_keepalived_node(new_ctrl_host): + """Task to provision a new node into a cluster of Contrail CFGM nodes with keepalived""" + with settings(host_string = new_ctrl_host): + enable_haproxy() + sudo("service haproxy restart") + setup_keepalived_node('cfgm') + @task @serial @roles('cfgm') @@ -241,7 +334,7 @@ def setup_keepalived_node(role): sleep(2) print "Waiting for VIP to be associated to MASTER VRRP." continue - + with cd(INSTALLER_DIR): cmd = "setup-vnc-keepalived\ --self_ip %s --internal_vip %s --mgmt_self_ip %s\ @@ -272,7 +365,7 @@ def fixup_restart_haproxy_in_openstack_node(*args): memcached_server_lines = '' rabbitmq_server_lines = '' mysql_server_lines = '' - space = ' ' * 3 + space = ' ' * 3 for host_string in env.roledefs['openstack']: server_index = env.roledefs['openstack'].index(host_string) + 1 @@ -324,7 +417,7 @@ def fixup_restart_haproxy_in_openstack_node(*args): else: mysql_server_lines +=\ '%s server mysql%s %s:3306 weight 100 check inter 2000 rise 2 fall 3 backup\n'\ - % (space, server_index, host_ip) + % (space, server_index, host_ip) for host_string in env.roledefs['openstack']: @@ -504,47 +597,143 @@ def create_and_copy_service_token(): @serial @roles('openstack') def setup_cmon_schema(): - """Task to configure cmon schema in the openstack nodes to monitor galera cluster""" + execute('setup_cmon_schema_node', env.host_string) + +@task +def setup_cmon_schema_node(*args): + """Task to configure cmon schema in the given host to monitor galera cluster""" if len(env.roledefs['openstack']) <= 1: print "Single Openstack cluster, skipping cmon schema setup." return - openstack_host_list = [get_control_host_string(openstack_host)\ - for openstack_host in env.roledefs['openstack']] - galera_ip_list = [hstr_to_ip(galera_host)\ - for galera_host in openstack_host_list] - internal_vip = get_openstack_internal_vip() - - mysql_token = sudo("cat /etc/contrail/mysql.token") - pdist = detect_ostype() - if pdist in ['ubuntu']: - mysql_svc = 'mysql' - elif pdist in ['centos', 'redhat']: - mysql_svc = 'mysqld' + for host_string in args: + openstack_host_list = [get_control_host_string(openstack_host)\ + for openstack_host in env.roledefs['openstack']] + galera_ip_list = [hstr_to_ip(galera_host)\ + for galera_host in openstack_host_list] + internal_vip = get_openstack_internal_vip() + + mysql_token = sudo("cat /etc/contrail/mysql.token") + pdist = detect_ostype() + if pdist in ['ubuntu']: + mysql_svc = 'mysql' + elif pdist in ['centos', 'redhat']: + mysql_svc = 'mysqld' + + # Create cmon schema + sudo('mysql -u root -p%s -e "CREATE SCHEMA IF NOT EXISTS cmon"' % mysql_token) + sudo('mysql -u root -p%s < /usr/local/cmon/share/cmon/cmon_db.sql' % mysql_token) + sudo('mysql -u root -p%s < /usr/local/cmon/share/cmon/cmon_data.sql' % mysql_token) + + # insert static data + sudo('mysql -u root -p%s -e "use cmon; insert into cluster(type) VALUES (\'galera\')"' % mysql_token) + + host_list = galera_ip_list + ['localhost', '127.0.0.1', internal_vip] + # Create cmon user + for host in host_list: + mysql_cmon_user_cmd = 'mysql -u root -p%s -e "CREATE USER \'cmon\'@\'%s\' IDENTIFIED BY \'cmon\'"' % ( + mysql_token, host) + with settings(hide('everything'),warn_only=True): + sudo(mysql_cmon_user_cmd) + + mysql_cmd = "mysql -uroot -p%s -e" % mysql_token + # Grant privilages for cmon user. + for host in host_list: + sudo('%s "GRANT ALL PRIVILEGES on *.* TO cmon@%s IDENTIFIED BY \'cmon\' WITH GRANT OPTION"' % + (mysql_cmd, host)) - # Create cmon schema - sudo('mysql -u root -p%s -e "CREATE SCHEMA IF NOT EXISTS cmon"' % mysql_token) - sudo('mysql -u root -p%s < /usr/local/cmon/share/cmon/cmon_db.sql' % mysql_token) - sudo('mysql -u root -p%s < /usr/local/cmon/share/cmon/cmon_data.sql' % mysql_token) - # insert static data - sudo('mysql -u root -p%s -e "use cmon; insert into cluster(type) VALUES (\'galera\')"' % mysql_token) +@task +@roles('openstack') +def start_openstack(): + with settings(warn_only=True): + sudo('service supervisor-openstack start') - host_list = galera_ip_list + ['localhost', '127.0.0.1', internal_vip] - # Create cmon user - for host in host_list: - mysql_cmon_user_cmd = 'mysql -u root -p%s -e "CREATE USER \'cmon\'@\'%s\' IDENTIFIED BY \'cmon\'"' % ( - mysql_token, host) - with settings(hide('everything'),warn_only=True): - sudo(mysql_cmon_user_cmd) +@task +@roles('build') +def join_orchestrator(new_ctrl_host): + orch = get_orchestrator() + if orch == 'openstack': + execute('increase_ulimits_node', new_ctrl_host) + execute('setup_openstack_node', new_ctrl_host) + if is_package_installed('contrail-openstack-dashboard'): + execute('setup_contrail_horizon_node', new_ctrl_host) + if get_openstack_internal_vip(): + execute('sync_keystone_ssl_certs_node', new_ctrl_host) + execute('setup_cluster_monitors_node', new_ctrl_host) + with settings(host_string = new_ctrl_host): + sudo('service supervisor-openstack restart') + execute('verify_openstack') - mysql_cmd = "mysql -uroot -p%s -e" % mysql_token - # Grant privilages for cmon user. - for host in host_list: - sudo('%s "GRANT ALL PRIVILEGES on *.* TO cmon@%s IDENTIFIED BY \'cmon\' WITH GRANT OPTION"' % - (mysql_cmd, host)) +@task +@roles('build') +def join_ha_cluster(new_ctrl_host): + execute('pre_check') + if get_contrail_internal_vip(): + print "Contrail HA setup - Adding %s to existing \ + cluster", new_ctrl_host + execute('join_keepalived_cluster', new_ctrl_host) + execute('fixup_restart_haproxy_in_collector') + if get_openstack_internal_vip(): + if new_ctrl_host in env.roledefs['openstack']: + print "Multi Openstack setup, Adding %s to the existing \ + OpenStack cluster", new_ctrl_host + execute('join_galera_cluster', new_ctrl_host) + execute('setup_cmon_schema_node', new_ctrl_host) + execute('fix_restart_xinetd_conf_node', new_ctrl_host) + execute('fixup_restart_haproxy_in_openstack') + execute('start_openstack') + execute('fix_memcache_conf_node', new_ctrl_host) + execute('tune_tcp_node', new_ctrl_host) + execute('fix_cmon_param_and_add_keys_to_compute') + execute('create_and_copy_service_token') + execute('join_rabbitmq_cluster', new_ctrl_host) + execute('increase_limits_node', new_ctrl_host) + execute('join_orchestrator', new_ctrl_host) + + if new_ctrl_host in env.roledefs['database']: + execute('setup_database_node', new_ctrl_host) + execute('fix_zookeeper_config') + execute('restart_all_zookeeper_servers') + + if new_ctrl_host in env.roledefs['cfgm']: + execute('setup_cfgm_node', new_ctrl_host) + execute('verify_cfgm') + execute('fix_cfgm_config') + + if new_ctrl_host in env.roledefs['control']: + execute('setup_control_node', new_ctrl_host) + execute('verify_control') + + if new_ctrl_host in env.roledefs['collector']: + execute('setup_collector_node', new_ctrl_host) + execute('fix_collector_config') + execute('verify_collector') + + if new_ctrl_host in env.roledefs['webui']: + execute('setup_webui_node', new_ctrl_host) + execute('fix_webui_config') + execute('verify_webui') + + if new_ctrl_host in env.roledefs['cfgm']: + execute('prov_config_node', new_ctrl_host) + execute('prov_metadata_services') + execute('prov_encap_type') + + if new_ctrl_host in env.roledefs['database']: + execute('prov_database_node', new_ctrl_host) + + if new_ctrl_host in env.roledefs['analytics']: + execute('prov_analytics_node', new_ctrl_host) + + if new_ctrl_host in env.roledefs['control']: + execute('prov_control_bgp') + execute('prov_external_bgp') + + execute('setup_remote_syslog') + @task @roles('build') def setup_ha(): diff --git a/fabfile/tasks/helpers.py b/fabfile/tasks/helpers.py index d1c66b3d3..00d2fe107 100644 --- a/fabfile/tasks/helpers.py +++ b/fabfile/tasks/helpers.py @@ -251,11 +251,11 @@ def check_ssh(): if not verify_sshd(hostip, user, password): sshd_down_hosts += "%s : %s\n" % (host_string, password) - if sshd_down_hosts: + if sshd_down_hosts: raise Exception("Following list of hosts are down: \n %s" % sshd_down_hosts) else: print "\n\tAll nodes are Up." - + @roles('all') @task def all_command(command): @@ -320,7 +320,7 @@ def compute_provision(): tgt_ip = env.host_string.split('@')[1] tgt_hostname = sudo("hostname") prov_args = "--host_name %s --host_ip %s --api_server_ip %s --oper add " \ - %(tgt_hostname, tgt_ip, cfgm_ip) + %(tgt_hostname, tgt_ip, cfgm_ip) sudo("/opt/contrail/utils/provision_vrouter.py %s" %(prov_args)) @@ -603,8 +603,8 @@ def virsh_cleanup(): sudo('virsh destroy %s' %(inst_name)) sudo('virsh undefine %s' %(inst_name)) sudo('rm -rf /var/lib/nova/instances/%s' %(inst_name)) - -#end virsh_cleanup + +#end virsh_cleanup @task def virsh_cmd(cmd): result = sudo('virsh %s' %(cmd)) @@ -653,7 +653,7 @@ def cleanup_os_config(): for db in dbs: sudo('mysql -u root --password=%s -e \'drop database %s;\'' %(token, db)) - + if detect_ostype() == 'ubuntu': services = ubuntu_services for service in services : @@ -662,14 +662,14 @@ def cleanup_os_config(): sudo('sudo rm -f /etc/contrail/mysql.token') sudo('sudo rm -f /etc/contrail/service.token') sudo('sudo rm -f /etc/contrail/keystonerc') - - #TODO + + #TODO # In Ubuntu, by default glance uses sqlite # Until we have a clean way of clearing glance image-data in sqlite, # just skip removing the images on Ubuntu if not detect_ostype() in ['ubuntu']: sudo('sudo rm -f /var/lib/glance/images/*') - + sudo('sudo rm -rf /var/lib/nova/tmp/nova-iptables') sudo('sudo rm -rf /var/lib/libvirt/qemu/instance*') sudo('sudo rm -rf /var/log/libvirt/qemu/instance*') @@ -678,7 +678,7 @@ def cleanup_os_config(): sudo('sudo rm -rf /var/log/libvirt/qemu/inst*') sudo('sudo rm -rf /etc/libvirt/qemu/inst*') sudo('sudo rm -rf /var/lib/nova/instances/_base/*') - + if detect_ostype() in ['ubuntu'] and env.host_string in env.roledefs['openstack']: sudo('mysql_install_db --user=mysql --ldata=/var/lib/mysql/') #end cleanup_os_config @@ -686,13 +686,13 @@ def cleanup_os_config(): @roles('build') @task def config_server_reset(option=None, hosts=[]): - + for host_string in hosts: api_config_file = '/etc/contrail/supervisord_config_files/contrail-api.ini' disc_config_file = '/etc/contrail/supervisord_config_files/contrail-discovery.ini' schema_config_file = '/etc/contrail/supervisord_config_files/contrail-schema.ini' svc_m_config_file = '/etc/contrail/supervisord_config_files/contrail-svc-monitor.ini' - + with settings(host_string=host_string): try : if option == "add" : @@ -793,7 +793,7 @@ def enable_haproxy(): if detect_ostype() == 'ubuntu': with settings(warn_only=True): sudo("sudo sed -i 's/ENABLED=.*/ENABLED=1/g' /etc/default/haproxy") -#end enable_haproxy +#end enable_haproxy def qpidd_changes_for_ubuntu(): '''Qpidd.conf changes for Ubuntu @@ -821,7 +821,7 @@ def is_pingable(host_string, negate=False, maxwait=900): res = sudo('ping -q -w 2 -c 1 %s' %hostip) except: res = runouput(return_code=1) - + if res.return_code == 0 and negate == 'False': print 'Host (%s) is Pingable' break @@ -940,9 +940,27 @@ def increase_ulimits(): ''' Increase ulimit in /etc/init.d/mysqld /etc/init/mysql.conf /etc/init.d/rabbitmq-server files ''' + execute('increase_ulimits_node', env.host_string) + +@task +def increase_ulimits_node(*args): + for host_string in args: + with settings(host_string=host_string, warn_only = True): + if detect_ostype() == 'ubuntu': + sudo("sed -i '/start|stop)/ a\ ulimit -n 10240' /etc/init.d/mysql") + sudo("sed -i '/start_rabbitmq () {/a\ ulimit -n 10240' /etc/init.d/rabbitmq-server") + sudo("sed -i '/umask 007/ a\limit nofile 10240 10240' /etc/init/mysql.conf") + sudo("sed -i '/\[mysqld\]/a\max_connections = 10000' /etc/mysql/my.cnf") + sudo("echo 'ulimit -n 10240' >> /etc/default/rabbitmq-server") + else: + sudo("sed -i '/start(){/ a\ ulimit -n 10240' /etc/init.d/mysqld") + sudo("sed -i '/start_rabbitmq () {/a\ ulimit -n 10240' /etc/init.d/rabbitmq-server") + sudo("sed -i '/\[mysqld\]/a\max_connections = 2048' /etc/my.cnf") + + with settings(warn_only = True): if detect_ostype() == 'ubuntu': - sudo("sed -i '/start|stop)/ a\ ulimit -n 10240' /etc/init.d/mysql") + sudo("sed -i '/start|stop)/ a\ ulimit -n 10240' /etc/init.d/mysql") sudo("sed -i '/start_rabbitmq () {/a\ ulimit -n 10240' /etc/init.d/rabbitmq-server") sudo("sed -i '/umask 007/ a\limit nofile 10240 10240' /etc/init/mysql.conf") sudo("sed -i '/\[mysqld\]/a\max_connections = 10000' /etc/mysql/my.cnf") @@ -958,6 +976,44 @@ def increase_limits(): ''' Increase limits in /etc/security/limits.conf, sysctl.conf and /etc/contrail/supervisor*.conf files ''' + execute('increase_limits_node', env.host_string) + +@task +def increase_limits_node(*args): + for host_string in args: + limits_conf = '/etc/security/limits.conf' + with settings(host_string=host_string, warn_only=True): + pattern='^root\s*soft\s*nproc\s*.*' + if detect_ostype() in ['ubuntu']: + line = 'root soft nofile 65535\nroot hard nofile 65535' + else: + line = 'root soft nproc 65535' + insert_line_to_file(pattern = pattern, line = line,file_name = limits_conf) + + pattern='^*\s*hard\s*nofile\s*.*' + line = '* hard nofile 65535' + insert_line_to_file(pattern = pattern, line = line,file_name = limits_conf) + + pattern='^*\s*soft\s*nofile\s*.*' + line = '* soft nofile 65535' + insert_line_to_file(pattern = pattern, line = line,file_name = limits_conf) + + pattern='^*\s*hard\s*nproc\s*.*' + line = '* hard nproc 65535' + insert_line_to_file(pattern = pattern, line = line,file_name = limits_conf) + + pattern='^*\s*soft\s*nproc\s*.*' + line = '* soft nofile 65535' + insert_line_to_file(pattern = pattern, line = line,file_name = limits_conf) + + sysctl_conf = '/etc/sysctl.conf' + insert_line_to_file(pattern = '^fs.file-max.*', + line = 'fs.file-max = 65535',file_name = sysctl_conf) + sudo('sysctl -p') + + sudo('sed -i \'s/^minfds.*/minfds=10240/\' /etc/contrail/supervisor*.conf') + + limits_conf = '/etc/security/limits.conf' with settings(warn_only = True): pattern='^root\s*soft\s*nproc\s*.*' @@ -1058,7 +1114,7 @@ def validate_hosts(): all_hostnames = env.hostnames['all'] current_hostlist = {} current_hosttimes = {} - + # Check if the hostnames on the nodes are as mentioned in testbed file for host in env.roledefs['all']: with settings(host_string = host): @@ -1074,15 +1130,15 @@ def validate_hosts(): print "They are %s and %s" %(hstr_to_ip(host), hstr_to_ip(current_hostlist[curr_hostname])) print "Please fix them before continuing!! " exit(1) - + #Check if env.hostnames['all'] has any spurious entries if set(current_hostlist.keys()) != set(env.hostnames['all']): print "hostnames['all'] in testbed file does not seem to be correct" print "Expected : %s" %(current_hostlist) - print "Seen : %s" %(env.hostnames['all']) + print "Seen : %s" %(env.hostnames['all']) exit(1) print "All hostnames are unique and defined in testbed correctly..OK" - + #Check if date/time on the hosts are almost the same (diff < 5min) for host in env.roledefs['all']: with settings(host_string = host): @@ -1093,16 +1149,16 @@ def validate_hosts(): host, datetime.datetime.fromtimestamp(avg_time), datetime.datetime.fromtimestamp(float(current_hosttimes[host]))) - if abs(avg_time - int(current_hosttimes[host])) > 300 : + if abs(avg_time - int(current_hosttimes[host])) > 300 : print "Time of Host % seems to be not in sync with rest of the hosts" %(host) print "Please make sure that the date and time on all hosts are in sync before continuning!!" exit(1) print "Date and time on all hosts are in sync..OK" - + # Check if all hosts are reachable by each other using their hostnames execute(full_mesh_ping_by_name) - + @task @roles('openstack') @@ -1179,7 +1235,7 @@ def pre_check(): print "\tHowever minimum disk space for database node is 256GB." exit(1) database_nodes = deepcopy(env.roledefs['database']) - if (len(database_nodes) % 2) != 1: + if (len(database_nodes) == 2): print "\nERROR: \n\tRecommended to deploy odd number of zookeeper(database) nodes." print "\tAdd/remove a node to/from the existing clusters testbed.py and continue." exit(1) @@ -1269,15 +1325,20 @@ def set_allow_unsupported_sfp(): @task @roles('all') def setup_common(): - with settings(warn_only=True): - ntp_server = get_ntp_server() - if ntp_server is not None and\ - exists('/etc/ntp.conf'): - ntp_chk_cmd = 'grep "server ' + ntp_server + '" /etc/ntp.conf' - ntp_chk_cmd_out = sudo(ntp_chk_cmd) - if ntp_chk_cmd_out == "": - ntp_cmd = 'echo "server ' + ntp_server + '" >> /etc/ntp.conf' - sudo(ntp_cmd) + execute("setup_common_node", env.host_string) + +@task +def setup_common_node(*args): + for host_string in args: + with settings(host_string=host_string, warn_only=True): + ntp_server = get_ntp_server() + if ntp_server is not None and\ + exists('/etc/ntp.conf'): + ntp_chk_cmd = 'grep "server ' + ntp_server + '" /etc/ntp.conf' + ntp_chk_cmd_out = sudo(ntp_chk_cmd) + if ntp_chk_cmd_out == "": + ntp_cmd = 'echo "server ' + ntp_server + '" >> /etc/ntp.conf' + sudo(ntp_cmd) @task @roles('build') @@ -1374,7 +1435,7 @@ def all_sm_reimage_status(attempts=180, interval=10, node=None, contrail_role='a sys.stdout.write('%s :: %s -> %s\n' % (node, node_status_save[node], node_status[node])) node_status_save[node]=node_status[node] else: - if (node_status_save[node] != node_status[node] and + if (node_status_save[node] != node_status[node] and node_status_save[node] != "initial_state"): sys.stdout.write('%s :: %s -> %s\n' % (node, node_status_save[node], node_status[node])) node_status_save[node]=node_status[node] diff --git a/fabfile/tasks/install.py b/fabfile/tasks/install.py index 0dbc7d6c0..fee71c050 100644 --- a/fabfile/tasks/install.py +++ b/fabfile/tasks/install.py @@ -752,6 +752,7 @@ def reboot_on_kernel_update_without_openstack(reboot='True'): 'Reboot manually before setup to avoid misconfiguration!' % node else: print '[%s]: Node is already booted with new kernel' % node + @task @roles('build') def reboot_on_kernel_update(reboot='True'): @@ -794,6 +795,39 @@ def reboot_on_kernel_update(reboot='True'): else: print '[%s]: Node is already booted with new kernel' % node + +@roles('build') +@task +def install_new_contrail(**kwargs): + """Installs required contrail packages in all nodes as per the role definition. + """ + new_host = kwargs.get('new_ctrl') + execute('pre_check') + execute(create_install_repo_node, new_host) + + if new_host in env.roledefs['database']: + execute(install_database_node, True, new_host) + if (get_orchestrator() is 'openstack'): + if new_host in env.roledefs['openstack']: + execute("install_openstack_node", new_host) + else: + print "ERROR: Only adding a new Openstack controller is supported" + return + + if new_host in env.roledefs['cfgm']: + execute(install_cfgm_node, new_host) + + if new_host in env.roledefs['control']: + execute(install_control_node, new_host) + + if new_host in env.roledefs['collector']: + execute(install_collector_node, new_host) + + if new_host in env.roledefs['webui']: + execute(install_webui_node, new_host) + + execute(upgrade_pkgs_node, new_host) + @roles('build') @task def install_contrail(*tgzs, **kwargs): diff --git a/fabfile/tasks/provision.py b/fabfile/tasks/provision.py index 06d765f24..e32164d68 100644 --- a/fabfile/tasks/provision.py +++ b/fabfile/tasks/provision.py @@ -50,6 +50,48 @@ def setup_cfgm(): if env.roledefs['cfgm']: execute("setup_cfgm_node", env.host_string) +@roles('cfgm') +@task +def fix_cfgm_config(): + """Regenerate the config file in all the cfgm nodes""" + if env.roledefs['cfgm']: + execute("fix_cfgm_config_node", env.host_string) + +@task +def fix_cfgm_config_node(*args): + for host_string in args: + with settings(host_string = host_string): + cmd = frame_vnc_config_cmd(host_string, "update-cfgm-config") + sudo(cmd) + +@roles('collector') +@task +def fix_collector_config(): + """Regenerate the collector file in all the analytics nodes""" + if env.roledefs['collector']: + execute("fix_collector_config_node", env.host_string) + +@task +def fix_collector_config_node(*args): + for host_string in args: + with settings(host_string = host_string): + cmd = frame_vnc_collector_cmd(host_string, "update-collector-config") + sudo(cmd) + +@roles('webui') +@task +def fix_webui_config(): + """Regenerate the webui config file in all the webui nodes""" + if env.roledefs['webui']: + execute("fix_webui_config_node", env.host_string) + +@task +def fix_webui_config_node(*args): + for host_string in args: + with settings(host_string = host_string): + cmd = frame_vnc_webui_cmd(host_string, "update-webui-config") + sudo(cmd) + def fixup_restart_haproxy_in_all_cfgm(nworkers): template = string.Template(""" #contrail-config-marker-start @@ -980,7 +1022,6 @@ def setup_image_service_node(*args): @roles('openstack') def setup_openstack(): """Provisions openstack services in all nodes defined in openstack role.""" - execute('add_openstack_reserverd_ports') if env.roledefs['openstack']: execute("setup_openstack_node", env.host_string) # Blindly run setup_openstack twice for Ubuntu @@ -1035,7 +1076,8 @@ def setup_nova_aggregate_node(*args): continue if sudo("(source /etc/contrail/openstackrc; nova aggregate-add-host %s %s)" % (hypervisor, host_name)).failed: continue # Services might be starting up after reboot - break # Stop retrying as the aggregate is created and compute is added. + break # Stop retrying as the aggregate is created and compute is added + @roles('openstack') @task @@ -1048,7 +1090,7 @@ def setup_contrail_horizon(): def setup_openstack_node(*args): """Provisions openstack services in one or list of nodes. USAGE: fab setup_openstack_node:user@1.1.1.1,user@2.2.2.2""" #qpidd_changes_for_ubuntu() - + execute('add_openstack_reserverd_ports') for host_string in args: # Frame the command line to provision openstack cmd = frame_vnc_openstack_cmd(host_string) @@ -1169,6 +1211,7 @@ def setup_redis_server_node(*args): sudo("service %s restart" % (redis_svc_name)) #end setup_redis_server_node + @task def setup_collector_node(*args): """Provisions collector services in one or list of nodes. USAGE: fab setup_collector_node:user@1.1.1.1,user@2.2.2.2""" @@ -1245,6 +1288,33 @@ def setup_database_node(*args): sudo(cmd) #end setup_database +@task +@roles('database') +def fix_zookeeper_config(): + """Update the zookeeper config based on the new configuration""" + if env.roledefs['database']: + execute("fix_zookeeper_config_node", env.host_string) + +@task +def fix_zookeeper_config_node(*args): + for host_string in args: + cmd = frame_vnc_database_cmd(host_string, 'update-zoo-servers') + sudo(cmd) + +@task +@roles('database') +def restart_all_zookeeper_servers(): + """Restarts all zookeeper server in all the database nodes""" + if env.roledefs['database']: + execute("restart_all_zookeeper_servers_node", env.host_string) + +@task +def restart_all_zookeeper_servers_node(*args): + for host_string in args: + cmd = frame_vnc_database_cmd(host_string, 'restart-zoo-server') + sudo(cmd) + + @task @roles('webui') def setup_webui(): @@ -1252,6 +1322,7 @@ def setup_webui(): if env.roledefs['webui']: execute("setup_webui_node", env.host_string) + @task def setup_webui_node(*args): """Provisions webui services in one or list of nodes. USAGE: fab setup_webui_node:user@1.1.1.1,user@2.2.2.2""" @@ -1426,154 +1497,194 @@ def setup_only_vrouter_node(manage_nova_compute='yes', configure_nova='yes', *ar @task @EXECUTE_TASK -@roles('cfgm') -def prov_config_node(): - cfgm_ip = hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0])) - tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) - tgt_hostname = sudo("hostname") - - with cd(UTILS_DIR): - cmd = "python provision_config_node.py" - cmd += " --api_server_ip %s" % cfgm_ip - cmd += " --host_name %s" % tgt_hostname - cmd += " --host_ip %s" % tgt_ip - cmd += " --oper add" - cmd += " %s" % get_mt_opts() - sudo(cmd) +def prov_config(): + execute("prov_config_node", env.host_string) + +@task +def prov_config_node(*args): + for host_string in args: + with settings(host_string = host_string): + cfgm_ip = hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0])) + tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) + tgt_hostname = sudo("hostname") + + with cd(UTILS_DIR): + cmd = "python provision_config_node.py" + cmd += " --api_server_ip %s" % cfgm_ip + cmd += " --host_name %s" % tgt_hostname + cmd += " --host_ip %s" % tgt_ip + cmd += " --oper add" + cmd += " %s" % get_mt_opts() + sudo(cmd) #end prov_config_node @task @EXECUTE_TASK @roles('database') -def prov_database_node(): - cfgm_host = env.roledefs['cfgm'][0] - cfgm_ip = hstr_to_ip(get_control_host_string(cfgm_host)) - cfgm_host_password = get_env_passwords(env.roledefs['cfgm'][0]) - tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) - tgt_hostname = sudo("hostname") - - with settings(cd(UTILS_DIR), host_string=cfgm_host, - password=cfgm_host_password): - cmd = "python provision_database_node.py" - cmd += " --api_server_ip %s" % cfgm_ip - cmd += " --host_name %s" % tgt_hostname - cmd += " --host_ip %s" % tgt_ip - cmd += " --oper add" - cmd += " %s" % get_mt_opts() - sudo(cmd) +def prov_database(): + execute("prov_database_node", env.host_string) + +@task +def prov_database_node(*args): + for host_string in args: + with settings(host_string = host_string): + cfgm_host = env.roledefs['cfgm'][0] + cfgm_ip = hstr_to_ip(get_control_host_string(cfgm_host)) + cfgm_host_password = get_env_passwords(env.roledefs['cfgm'][0]) + tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) + tgt_hostname = sudo("hostname") + + with settings(cd(UTILS_DIR), host_string=cfgm_host, + password=cfgm_host_password): + cmd = "python provision_database_node.py" + cmd += " --api_server_ip %s" % cfgm_ip + cmd += " --host_name %s" % tgt_hostname + cmd += " --host_ip %s" % tgt_ip + cmd += " --oper add" + cmd += " %s" % get_mt_opts() + sudo(cmd) #end prov_database_node @task @EXECUTE_TASK @roles('collector') -def prov_analytics_node(): - cfgm_host = env.roledefs['cfgm'][0] - cfgm_ip = hstr_to_ip(get_control_host_string(cfgm_host)) - cfgm_host_password = get_env_passwords(env.roledefs['cfgm'][0]) - tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) - tgt_hostname = sudo("hostname") - - with settings(cd(UTILS_DIR), host_string=cfgm_host, - password=cfgm_host_password): - cmd = "python provision_analytics_node.py" - cmd += " --api_server_ip %s" % cfgm_ip - cmd += " --host_name %s" % tgt_hostname - cmd += " --host_ip %s" % tgt_ip - cmd += " --oper add" - cmd += " %s" % get_mt_opts() - sudo(cmd) +def prov_analytics(): + execute("prov_analytics_node", env.host_string) + +@task +def prov_analytics_node(*args): + for host_string in args: + with settings(host_string = host_string): + cfgm_host = env.roledefs['cfgm'][0] + cfgm_ip = hstr_to_ip(get_control_host_string(cfgm_host)) + cfgm_host_password = get_env_passwords(env.roledefs['cfgm'][0]) + tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) + tgt_hostname = sudo("hostname") + + with settings(cd(UTILS_DIR), host_string=cfgm_host, + password=cfgm_host_password): + cmd = "python provision_analytics_node.py" + cmd += " --api_server_ip %s" % cfgm_ip + cmd += " --host_name %s" % tgt_hostname + cmd += " --host_ip %s" % tgt_ip + cmd += " --oper add" + cmd += " %s" % get_mt_opts() + sudo(cmd) #end prov_analytics_node @task @EXECUTE_TASK @roles('control') def prov_control_bgp(): - cfgm_host = env.roledefs['cfgm'][0] - cfgm_ip = hstr_to_ip(get_control_host_string(cfgm_host)) - cfgm_host_password = get_env_passwords(env.roledefs['cfgm'][0]) - tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) - tgt_hostname = sudo("hostname") - - with settings(cd(UTILS_DIR), host_string=cfgm_host, - password=cfgm_host_password): - print "Configuring global system config with the ASN" - cmd = "python provision_control.py" - cmd += " --api_server_ip %s" % cfgm_ip - cmd += " --api_server_port 8082" - cmd += " --router_asn %s" % testbed.router_asn - md5_value = get_bgp_md5(env.host_string) - #if condition required because without it, it will configure literal 'None' md5 key - if md5_value: - cmd += " --md5 %s" % md5_value - cmd += " %s" % get_mt_opts() - sudo(cmd) - print "Adding control node as bgp router" - cmd += " --host_name %s" % tgt_hostname - cmd += " --host_ip %s" % tgt_ip - cmd += " --oper add" - sudo(cmd) + execute("prov_control_bgp_node", env.host_string) + +@task +def prov_control_bgp_node(*args): + for host_string in args: + with settings(host_string = host_string): + cfgm_host = env.roledefs['cfgm'][0] + cfgm_ip = hstr_to_ip(get_control_host_string(cfgm_host)) + cfgm_host_password = get_env_passwords(env.roledefs['cfgm'][0]) + tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) + tgt_hostname = sudo("hostname") + + with settings(cd(UTILS_DIR), host_string=cfgm_host, + password=cfgm_host_password): + print "Configuring global system config with the ASN" + cmd = "python provision_control.py" + cmd += " --api_server_ip %s" % cfgm_ip + cmd += " --api_server_port 8082" + cmd += " --router_asn %s" % testbed.router_asn + md5_value = get_bgp_md5(env.host_string) + #if condition required because without it, it will configure literal 'None' md5 key + if md5_value: + cmd += " --md5 %s" % md5_value + cmd += " %s" % get_mt_opts() + sudo(cmd) + print "Adding control node as bgp router" + cmd += " --host_name %s" % tgt_hostname + cmd += " --host_ip %s" % tgt_ip + cmd += " --oper add" + sudo(cmd) #end prov_control_bgp @roles('cfgm') @task def prov_external_bgp(): - cfgm_ip = hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0])) + execute("prov_external_bgp_node", env.host_string) - for ext_bgp in testbed.ext_routers: - ext_bgp_name = ext_bgp[0] - ext_bgp_ip = ext_bgp[1] - with cd(UTILS_DIR): - cmd = "python provision_mx.py" - cmd += " --api_server_ip %s" % cfgm_ip - cmd += " --api_server_port 8082" - cmd += " --router_name %s" % ext_bgp_name - cmd += " --router_ip %s" % ext_bgp_ip - cmd += " --router_asn %s" % testbed.router_asn - cmd += " %s" % get_mt_opts() - sudo(cmd) +@task +def prov_external_bgp_node(*args): + for host_string in args: + with settings(host_string = host_string): + cfgm_ip = hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0])) + for ext_bgp in testbed.ext_routers: + ext_bgp_name = ext_bgp[0] + ext_bgp_ip = ext_bgp[1] + with cd(UTILS_DIR): + cmd = "python provision_mx.py" + cmd += " --api_server_ip %s" % cfgm_ip + cmd += " --api_server_port 8082" + cmd += " --router_name %s" % ext_bgp_name + cmd += " --router_ip %s" % ext_bgp_ip + cmd += " --router_asn %s" % testbed.router_asn + cmd += " %s" % get_mt_opts() + sudo(cmd) #end prov_control_bgp @roles('cfgm') @task def prov_metadata_services(): - cfgm_ip = get_contrail_internal_vip() or hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0])) - orch = get_orchestrator() - if orch is 'none': - return + execute("prov_metadata_services_node", env.host_string) - if orch is 'openstack': - openstack_host = get_control_host_string(env.roledefs['openstack'][0]) - ipfabric_service_ip = get_openstack_internal_vip() or hstr_to_ip(openstack_host) - ipfabric_service_port = '8775' - elif orch is 'vcenter': - ipfabric_service_ip = get_authserver_ip() - ipfabric_service_port = get_authserver_port() - admin_user, admin_password = get_authserver_credentials() - metadata_args = "--admin_user %s" % admin_user - metadata_args += " --admin_password %s" % admin_password - metadata_args += " --ipfabric_service_ip %s" % ipfabric_service_ip - metadata_args += " --api_server_ip %s" % cfgm_ip - metadata_args += " --linklocal_service_name metadata" - metadata_args += " --linklocal_service_ip 169.254.169.254" - metadata_args += " --linklocal_service_port 80" - metadata_args += " --ipfabric_service_port %s" % ipfabric_service_port - metadata_args += " --oper add" - sudo("python /opt/contrail/utils/provision_linklocal.py %s" % metadata_args) +@task +def prov_metadata_services_node(*args): + for host_string in args: + with settings(host_string = host_string): + cfgm_ip = get_contrail_internal_vip() or hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0])) + orch = get_orchestrator() + if orch is 'none': + return + + if orch is 'openstack': + openstack_host = get_control_host_string(env.roledefs['openstack'][0]) + ipfabric_service_ip = get_openstack_internal_vip() or hstr_to_ip(openstack_host) + ipfabric_service_port = '8775' + elif orch is 'vcenter': + ipfabric_service_ip = get_authserver_ip() + ipfabric_service_port = get_authserver_port() + admin_user, admin_password = get_authserver_credentials() + metadata_args = "--admin_user %s" % admin_user + metadata_args += " --admin_password %s" % admin_password + metadata_args += " --ipfabric_service_ip %s" % ipfabric_service_ip + metadata_args += " --api_server_ip %s" % cfgm_ip + metadata_args += " --linklocal_service_name metadata" + metadata_args += " --linklocal_service_ip 169.254.169.254" + metadata_args += " --linklocal_service_port 80" + metadata_args += " --ipfabric_service_port %s" % ipfabric_service_port + metadata_args += " --oper add" + sudo("python /opt/contrail/utils/provision_linklocal.py %s" % metadata_args) #end prov_metadata_services @roles('cfgm') @task def prov_encap_type(): - cfgm_ip = hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0])) - admin_user, admin_password = get_authserver_credentials() - if 'encap_priority' not in env.keys(): - env.encap_priority="MPLSoUDP,MPLSoGRE,VXLAN" - encap_args = "--admin_user %s" % admin_user - encap_args += " --admin_password %s" % admin_password - encap_args += " --encap_priority %s" % env.encap_priority - encap_args += " --oper add" - sudo("python /opt/contrail/utils/provision_encap.py %s" % encap_args) - sleep(10) + execute("prov_encap_type_node", env.host_string) + +@task +def prov_encap_type_node(*args): + for host_string in args: + with settings(host_string = host_string): + cfgm_ip = hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0])) + admin_user, admin_password = get_authserver_credentials() + if 'encap_priority' not in env.keys(): + env.encap_priority="MPLSoUDP,MPLSoGRE,VXLAN" + encap_args = "--admin_user %s" % admin_user + encap_args += " --admin_password %s" % admin_password + encap_args += " --encap_priority %s" % env.encap_priority + encap_args += " --oper add" + sudo("python /opt/contrail/utils/provision_encap.py %s" % encap_args) + sleep(10) #end prov_encap_type @task @@ -2109,6 +2220,15 @@ def setup_orchestrator(): #elif orch == 'vcenter': #execute('setup_vcenter') + +@roles('build') +@task +def join_cluster(new_ctrl_ip): + """Provisions required contrail services in the node as per the role definition. + """ + execute('setup_common_node', new_ctrl_ip) + execute('join_ha_cluster', new_ctrl_ip) + @roles('build') @task def setup_all(reboot='True'): @@ -2176,9 +2296,9 @@ def setup_without_openstack(manage_nova_compute='yes', reboot='True'): execute('setup_webui') execute('verify_webui') execute('setup_vrouter', manage_nova_compute) - execute('prov_config_node') - execute('prov_database_node') - execute('prov_analytics_node') + execute('prov_config') + execute('prov_database') + execute('prov_analytics') execute('prov_control_bgp') execute('prov_external_bgp') execute('prov_metadata_services') @@ -2186,7 +2306,6 @@ def setup_without_openstack(manage_nova_compute='yes', reboot='True'): execute('setup_remote_syslog') execute('add_tsn', restart=False) execute('add_tor_agent', restart=False) - execute('increase_vrouter_limit') if reboot == 'True': print "Rebooting the compute nodes after setup all." execute(compute_reboot) @@ -2218,9 +2337,9 @@ def setup_contrail_analytics_components(manage_nova_compute='no', reboot='False' execute('verify_collector') execute('setup_webui') execute('verify_webui') - execute('prov_config_node') - execute('prov_database_node') - execute('prov_analytics_node') + execute('prov_config') + execute('prov_database') + execute('prov_analytics') execute('setup_remote_syslog') @roles('build') diff --git a/fabfile/tasks/rabbitmq.py b/fabfile/tasks/rabbitmq.py index b6e848128..a4b4148e5 100644 --- a/fabfile/tasks/rabbitmq.py +++ b/fabfile/tasks/rabbitmq.py @@ -67,23 +67,34 @@ def set_tcp_keepalive_on_compute(): @EXECUTE_TASK @roles('rabbit') def listen_at_supervisor_support_port(): - with settings(warn_only=True): - if sudo("service supervisor-support-service status | grep running").failed: - sudo("service supervisor-support-service start") - sudo("supervisorctl -s unix:///tmp/supervisord_support_service.sock stop all") + execute('listen_at_supervisor_support_port_node', env.host_string) + +@task +def listen_at_supervisor_support_port_node(*args): + for host_string in args: + with settings(host_string=host_string, warn_only=True): + if sudo("service supervisor-support-service status | grep running").failed: + sudo("service supervisor-support-service start") + sudo("supervisorctl -s unix:///tmp/supervisord_support_service.sock stop all") + @task @EXECUTE_TASK @roles('rabbit') def remove_mnesia_database(): - with settings(warn_only=True): - sudo("service rabbitmq-server stop") - if 'Killed' not in sudo("epmd -kill"): - sudo("pkill -9 beam") - sudo("pkill -9 epmd") - if 'beam' in sudo("netstat -anp | grep beam"): - sudo("pkill -9 beam") - sudo("rm -rf /var/lib/rabbitmq/mnesia") + execute('remove_mnesia_database_node', env.host_string) + +@task +def remove_mnesia_database_node(*args): + for host_string in args: + with settings(host_string=host_string, warn_only=True): + sudo("service rabbitmq-server stop") + if 'Killed' not in sudo("epmd -kill"): + sudo("pkill -9 beam") + sudo("pkill -9 epmd") + if 'beam' in sudo("netstat -anp | grep beam"): + sudo("pkill -9 beam") + sudo("rm -rf /var/lib/rabbitmq/mnesia") @task @parallel @@ -99,8 +110,8 @@ def rabbitmq_env(): erl_node_name = None rabbit_env_conf = '/etc/rabbitmq/rabbitmq-env.conf' with settings(host_string=env.host_string, password=get_env_passwords(env.host_string)): - host_name = sudo('hostname -s') + ctrl - erl_node_name = "rabbit@%s" % (host_name) + host_name = sudo('hostname -s') + ctrl + erl_node_name = "rabbit@%s" % (host_name) rabbitmq_env_template = rabbitmq_env_conf rmq_env_conf = rabbitmq_env_template.template.safe_substitute({ '__erl_node_ip__' : hstr_to_ip(get_control_host_string(env.host_string)), @@ -145,30 +156,46 @@ def config_rabbitmq(): @parallel @roles('rabbit') def allow_rabbitmq_port(): - execute('disable_iptables') + execute('allow_rabbitmq_port_node', env.host_string) + +@task +def allow_rabbitmq_port_node(*args): + for host_string in args: + with settings(host_string=host_string, warn_only=True): + execute('disable_iptables') @task @parallel @roles('rabbit') def stop_rabbitmq_and_set_cookie(uuid): - with settings(warn_only=True): - sudo("service rabbitmq-server stop") - if 'Killed' not in sudo("epmd -kill"): - sudo("pkill -9 beam") - sudo("pkill -9 epmd") - if 'beam' in sudo("netstat -anp | grep beam"): - sudo("pkill -9 beam") - sudo("rm -rf /var/lib/rabbitmq/mnesia/") - sudo("echo '%s' > /var/lib/rabbitmq/.erlang.cookie" % uuid) - sudo("chmod 400 /var/lib/rabbitmq/.erlang.cookie") - sudo("chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie") + execute('stop_rabbitmq_and_set_cookie_node', uuid, env.host_string) +@task +def stop_rabbitmq_and_set_cookie_node(uuid, *args): + for host_string in args: + with settings(host_string=host_string, warn_only=True): + sudo("service rabbitmq-server stop") + if 'Killed' not in sudo("epmd -kill"): + sudo("pkill -9 beam") + sudo("pkill -9 epmd") + if 'beam' in sudo("netstat -anp | grep beam"): + sudo("pkill -9 beam") + sudo("rm -rf /var/lib/rabbitmq/mnesia/") + sudo("echo '%s' > /var/lib/rabbitmq/.erlang.cookie" % uuid) + sudo("chmod 400 /var/lib/rabbitmq/.erlang.cookie") + sudo("chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie") @task @serial @roles('rabbit') def start_rabbitmq(): - sudo("service rabbitmq-server restart") + execute('start_rabbitmq_node', env.host_string) + +@task +def start_rabbitmq_node(*args): + for host_string in args: + with settings(host_string=host_string, warn_only=True): + sudo("service rabbitmq-server restart") @task @parallel @@ -268,6 +295,52 @@ def verify_cluster_status(retry='yes'): def set_ha_policy_in_rabbitmq(): sudo("rabbitmqctl set_policy HA-all \"\" '{\"ha-mode\":\"all\",\"ha-sync-mode\":\"automatic\"}'") +@task +@roles('build') +def join_rabbitmq_cluster(new_ctrl_host): + """ Task to join a new rabbit server into an existing cluster """ + # Provision rabbitmq cluster in cfgm role nodes. + amqp_roles = ['cfgm'] + if get_from_testbed_dict('openstack', 'manage_amqp', 'no') == 'yes': + #Provision rabbitmq cluster in openstack role nodes aswell. + amqp_roles.append('openstack') + for role in amqp_roles: + env.roledefs['rabbit'] = env.roledefs[role] + + # copy the erlang cookie from one of the other nodes. + rabbitmq_cluster_uuid = None + for host_string in env.roledefs['rabbit']: + with settings(host_string=host_string, warn_only=True): + if host_string != new_ctrl_host and\ + sudo('ls /var/lib/rabbitmq/.erlang.cookie').succeeded: + rabbitmq_cluster_uuid = \ + sudo('cat /var/lib/rabbitmq/.erlang.cookie') + break; + if rabbitmq_cluster_uuid is None: + raise RuntimeError("Not able to get the Erlang cookie from the cluster nodes") + + execute(listen_at_supervisor_support_port_node, new_ctrl_host) + execute(remove_mnesia_database_node, new_ctrl_host) + execute(verify_rabbit_node_hostname) + execute(allow_rabbitmq_port_node, new_ctrl_host) + execute(rabbitmq_env) + execute(config_rabbitmq) + execute('stop_rabbitmq_and_set_cookie_node', rabbitmq_cluster_uuid, new_ctrl_host) + execute('start_rabbitmq_node', new_ctrl_host) + # adding sleep to workaround rabbitmq bug 26370 prevent + # "rabbitmqctl cluster_status" from breaking the database, + # this is seen in ci + time.sleep(30) + if (role is 'openstack' and get_openstack_internal_vip() or + role is 'cfgm' and get_contrail_internal_vip()): + execute('set_ha_policy_in_rabbitmq') + execute('set_tcp_keepalive') + + result = execute(verify_cluster_status) + if False in result.values(): + print "Unable to setup RabbitMQ cluster in role[%s]...." % role + exit(1) + @task @roles('build') def setup_rabbitmq_cluster(force=False): @@ -299,7 +372,9 @@ def setup_rabbitmq_cluster(force=False): execute(config_rabbitmq) execute("stop_rabbitmq_and_set_cookie", rabbitmq_cluster_uuid) execute(start_rabbitmq) - #adding sleep to workaround rabbitmq bug 26370 prevent "rabbitmqctl cluster_status" from breaking the database, this is seen in ci + # adding sleep to workaround rabbitmq bug 26370 prevent + # "rabbitmqctl cluster_status" from breaking the database, + # this is seen in ci time.sleep(60) #execute(rabbitmqctl_stop_app) #execute(rabbitmqctl_reset)