diff --git a/contrail_provisioning/storage/setup.py b/contrail_provisioning/storage/setup.py index 326213cb..32820c59 100644 --- a/contrail_provisioning/storage/setup.py +++ b/contrail_provisioning/storage/setup.py @@ -72,6 +72,7 @@ def parse_args(self, args_str): parser.add_argument("--storage-setup-mode", help = "Configuration mode") parser.add_argument("--disks-to-remove", help = "Disks to remove", nargs="+", type=str) parser.add_argument("--hosts-to-remove", help = "Hosts to remove", nargs="+", type=str) + parser.add_argument("--storage-replica-size", help = "Replica size") self._args = parser.parse_args(self.remaining_argv) @@ -133,6 +134,7 @@ def enable_storage(self): if self._args.hosts_to_remove: storage_setup_args = storage_setup_args + " --hosts-to-remove %s" %(' '.join(self._args.hosts_to_remove)) + storage_setup_args = storage_setup_args + " --storage-replica-size %s" %(self._args.storage_replica_size) #Setup storage if storage is defined in testbed.py with settings(host_string=self._args.storage_master, password=storage_master_passwd): diff --git a/contrail_provisioning/storage/storagefs/ceph_utils.py b/contrail_provisioning/storage/storagefs/ceph_utils.py index 1c1e5b09..be5b757a 100644 --- a/contrail_provisioning/storage/storagefs/ceph_utils.py +++ b/contrail_provisioning/storage/storagefs/ceph_utils.py @@ -1408,7 +1408,8 @@ def do_pool_config(self, input_crush, storage_hostnames, # Sets PG/PGP count. # Sets ruleset based on pool/chassis configuration def do_configure_pools(self, storage_hostnames, storage_disk_config, - storage_ssd_disk_config, chassis_config): + storage_ssd_disk_config, chassis_config, + replica_size = None): global host_hdd_dict global host_ssd_dict global hdd_pool_count @@ -1464,6 +1465,9 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config, if host_hdd_dict[('hostcount', '%s' %(pool_index))] <= 1: self.exec_local('sudo ceph osd pool set volumes_hdd size %s' %(REPLICA_ONE)) + elif replica_size != 'None': + self.exec_local('sudo ceph osd pool set volumes_hdd size %s' + %(replica_size)) else: self.exec_local('sudo ceph osd pool set volumes_hdd size %s' %(REPLICA_DEFAULT)) @@ -1491,6 +1495,10 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config, self.exec_local('sudo ceph osd pool set volumes_hdd_%s size %s' %(host_hdd_dict[('poolname','%s' %(pool_index))], REPLICA_ONE)) + elif replica_size != 'None': + self.exec_local('sudo ceph osd pool set volumes_hdd_%s size %s' + %(host_hdd_dict[('poolname','%s' + %(pool_index))], replica_size)) else: self.exec_local('sudo ceph osd pool set volumes_hdd_%s size %s' %(host_hdd_dict[('poolname','%s' @@ -1521,6 +1529,11 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config, %(REPLICA_ONE)) self.exec_local('sudo ceph osd pool set images size %s' %(REPLICA_ONE)) + elif replica_size != 'None': + self.exec_local('sudo ceph osd pool set volumes size %s' + %(replica_size)) + self.exec_local('sudo ceph osd pool set images size %s' + %(replica_size)) else: self.exec_local('sudo ceph osd pool set volumes size %s' %(REPLICA_DEFAULT)) @@ -1560,6 +1573,9 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config, if host_ssd_dict[('hostcount', '%s' %(pool_index))] <= 1: self.exec_local('sudo ceph osd pool set volumes_ssd size %s' %(REPLICA_ONE)) + elif replica_size != 'None': + self.exec_local('sudo ceph osd pool set volumes_ssd size %s' + %(replica_size)) else: self.exec_local('sudo ceph osd pool set volumes_ssd size %s' %(REPLICA_DEFAULT)) @@ -1587,6 +1603,10 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config, self.exec_local('sudo ceph osd pool set volumes_ssd_%s size %s' %(host_ssd_dict[('poolname','%s' %(pool_index))], REPLICA_DEFAULT)) + elif replica_size != 'None': + self.exec_local('sudo ceph osd pool set volumes_ssd_%s size %s' + %(host_ssd_dict[('poolname','%s' + %(pool_index))], replica_size)) else: self.exec_local('sudo ceph osd pool set volumes_ssd_%s size %s' %(host_ssd_dict[('poolname','%s' @@ -1625,6 +1645,11 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config, %(REPLICA_TWO)) self.exec_local('sudo ceph osd pool set images size %s' %(REPLICA_TWO)) + elif replica_size != 'None': + self.exec_local('sudo ceph osd pool set volumes size %s' + %(replica_size)) + self.exec_local('sudo ceph osd pool set images size %s' + %(replica_size)) else: rep_size = self.exec_local('sudo ceph osd pool get volumes size | \ awk \'{print $2}\'') diff --git a/contrail_provisioning/storage/storagefs/livemnfs_setup.py b/contrail_provisioning/storage/storagefs/livemnfs_setup.py index 97557931..c3afbdad 100755 --- a/contrail_provisioning/storage/storagefs/livemnfs_setup.py +++ b/contrail_provisioning/storage/storagefs/livemnfs_setup.py @@ -27,6 +27,24 @@ class SetupNFSLivem(object): TMP_FSTAB='/tmp/fstab' global NOVA_INST_GLOBAL NOVA_INST_GLOBAL='/var/lib/nova/instances/global' + global MAX_RETRY_WAIT + MAX_RETRY_WAIT = 10 + + def check_vm(self, vmip): + retry = 0 + time.sleep(10) + while True: + vmnavail=local('ping -c 5 %s | grep \" 100%% packet loss\" |wc -l' %(vmip) , capture=True, shell='/bin/bash') + if vmnavail == '0': + break + retry += 1 + if retry > MAX_RETRY_WAIT: + vm_running=local('source /etc/contrail/openstackrc && nova list | grep -w " livemnfs " |grep ACTIVE |wc -l' , capture=True, shell='/bin/bash') + if vm_running != '0': + local('source /etc/contrail/openstackrc && nova reboot --hard livemnfs') + print 'Waiting for VM to come up' + time.sleep(10) + #end check_vm def __init__(self, args_str = None): print sys.argv[1:] @@ -104,7 +122,7 @@ def __init__(self, args_str = None): local('source /etc/contrail/openstackrc && nova boot --image livemnfs --flavor 100 --availability-zone nova:%s --nic net-id=%s livemnfs --meta storage_scope=local' %(nfs_livem_host, net_id), shell='/bin/bash') else: local('source /etc/contrail/openstackrc && nova start livemnfs', shell='/bin/bash') - wait_loop = 10 + wait_loop = 100 while True: vm_running=local('source /etc/contrail/openstackrc && nova list | grep livemnfs |grep ACTIVE |wc -l' , capture=True, shell='/bin/bash') if vm_running == '1': @@ -135,9 +153,12 @@ def __init__(self, args_str = None): # The vmip is the actual ip assigned to the VM. Use this for the rest of the configurations vmip = local('source /etc/contrail/openstackrc && nova show livemnfs |grep \"livemnfs network\"|awk \'{print $5}\'', capture=True, shell='/bin/bash') + gwnetaddr = '' for hostname, entries, entry_token in zip(self._args.storage_hostnames, self._args.storage_hosts, self._args.storage_host_tokens): if hostname == vmhost: with settings(host_string = 'root@%s' %(entries), password = entry_token): + gwaddr = run('ip addr show |grep -w %s | awk \'{print $2}\'' %(entries)) + gwnetaddr = netaddr.IPNetwork('%s' %(gwaddr)).cidr #Set autostart vm after node reboot run('openstack-config --set /etc/nova/nova.conf DEFAULT resume_guests_state_on_host_boot True') #check for vgw interface @@ -230,12 +251,26 @@ def __init__(self, args_str = None): '/tmp/interfaces' %(vmip), shell='/bin/bash') run('cp /tmp/interfaces /etc/network/interfaces'); + cur_gw = gwentry + # Check if the system is in the same network as the + # host running the livemnfs vm. If not, use the real + # Gateway as gw for the VM ip instead of using the + # compute node. + if gwnetaddr != '': + diff_net = run('ip route show | grep -w %s | \ + grep via | wc -l' %(gwnetaddr)) + if diff_net == '0': + cur_gw = gwentry + else: + cur_gw = run('ip route show | grep -w %s | \ + grep via | awk \'{print $3}\'' + %(gwnetaddr)) #check for dynamic route on the vm host dynroutedone=run('netstat -rn |grep %s|wc -l' %(vmip), shell='/bin/bash') if dynroutedone == '0': dynroutedone=run('route add %s gw %s' - %(vmip, gwentry), + %(vmip, cur_gw), shell='/bin/bash') #check and add static route on master staroutedone=run('cat /etc/network/interfaces ' @@ -246,13 +281,13 @@ def __init__(self, args_str = None): '/etc/network/interfaces'); run('echo \"up route add %s gw %s\" >> ' '/etc/network/interfaces' - %(vmip, gwentry)); + %(vmip, cur_gw)); # Add route to the local master node. dynroutedone=local('netstat -rn |grep %s|wc -l' %(vmip), shell='/bin/bash', capture=True) if dynroutedone == '0': - local('route add %s gw %s' %(vmip, gwentry), + local('route add %s gw %s' %(vmip, cur_gw), shell='/bin/bash') #check and add static route on master staroutedone=local('cat /etc/network/interfaces ' @@ -264,7 +299,7 @@ def __init__(self, args_str = None): '/etc/network/interfaces'); local('echo \"up route add %s gw %s\" >> ' '/etc/network/interfaces' - %(vmip, gwentry)); + %(vmip, cur_gw)); #cinder volume creation and attaching to VM avail=local('rados df | grep avail | awk \'{ print $3 }\'', capture = True, shell='/bin/bash') @@ -272,8 +307,9 @@ def __init__(self, args_str = None): # TODO need to check if this needs to be configurable avail_gb = int(avail)/1024/1024/2/3 print avail_gb - # update quota if available is > 1T - quota_gb = (avail_gb * 3) + # update quota based on Total size + total=local('rados df | grep "total space" | awk \'{ print $3 }\'', capture = True, shell='/bin/bash') + quota_gb = int(total)/1024/1024/2 admintenantid=local('source /etc/contrail/openstackrc && keystone tenant-list |grep " admin" | awk \'{print $2}\'' , capture=True, shell='/bin/bash') local('source /etc/contrail/openstackrc && cinder quota-update --gigabytes=%d %s' %(quota_gb, admintenantid), capture=True, shell='/bin/bash') @@ -302,12 +338,7 @@ def __init__(self, args_str = None): if volvmattached == '0': return - while True: - vmnavail=local('ping -c 5 %s | grep \" 100%% packet loss\" |wc -l' %(vmip) , capture=True, shell='/bin/bash') - if vmnavail == '0': - break - print 'Waiting for VM to come up' - time.sleep(10) + self.check_vm(vmip) with settings(host_string = 'livemnfs@%s' %(vmip), password = 'livemnfs'): mounted=run('sudo cat /proc/mounts|grep livemnfs|wc -l') if mounted == '0': @@ -316,15 +347,7 @@ def __init__(self, args_str = None): if vdbavail == '0': print 'Disk not available yet. Need to reboot VM' vdbavail=run('sudo reboot') - time.sleep(10) - while True: - print 'Waiting for VM to come up' - time.sleep(10) - vmnavail=local('ping -c 5 %s | grep \" 100%% packet loss\" |wc -l' %(vmip), - capture=True, shell='/bin/bash') - if vmnavail == '0': - time.sleep(10) - break + self.check_vm(vmip) else: break vdbavail=run('sudo parted /dev/vdb print |grep ext4|wc -l') @@ -387,14 +410,7 @@ def __init__(self, args_str = None): run('sudo service nfs-kernel-server restart > /tmp/nfssrv.out', shell='/bin/bash') time.sleep(2) vdbavail=run('sudo reboot') - time.sleep(10) - while True: - print 'Waiting for VM to come up' - time.sleep(10) - vmnavail=local('ping -c 5 %s | grep \" 100%% packet loss\" |wc -l' %(vmip) , capture=True, shell='/bin/bash') - if vmnavail == '0': - time.sleep(10) - break + self.check_vm(vmip) for hostname, entries, entry_token in zip(self._args.storage_hostnames, self._args.storage_hosts, self._args.storage_host_tokens): with settings(host_string = 'root@%s' %(entries), password = entry_token): @@ -567,6 +583,7 @@ def __init__(self, args_str = None): print 'Cannot find vm ip. Cannot continue unconfigure' return + gwnetaddr = '' for hostname, entries, entry_token in zip(self._args.storage_hostnames, self._args.storage_hosts, self._args.storage_host_tokens): if hostname == vmhost: with settings(host_string = 'root@%s' %(entries), password = entry_token): @@ -590,6 +607,8 @@ def __init__(self, args_str = None): break gateway_id = gateway_id + 1 + gwaddr = run('ip addr show |grep -w %s | awk \'{print $2}\'' %(entries)) + gwnetaddr = netaddr.IPNetwork('%s' %(gwaddr)).cidr #check for dynamic route on the vm host dynroutedone=run('netstat -rn |grep %s|wc -l' %(vmip), shell='/bin/bash') if dynroutedone == '1': @@ -612,12 +631,22 @@ def __init__(self, args_str = None): self._args.storage_host_tokens): if gwhostname == vmhost: gwentry = gwentries + cur_gw = gwentry + if gwnetaddr != '': + diff_net = run('ip route show | grep -w %s | \ + grep via | wc -l' %(gwnetaddr)) + if diff_net == '0': + cur_gw = gwentry + else: + cur_gw = run('ip route show | grep -w %s | \ + grep via | awk \'{print $3}\'' + %(gwnetaddr)) #check for dynamic route on the vm host dynroutedone=run('netstat -rn |grep %s|wc -l' %(vmip), shell='/bin/bash') if dynroutedone == '1': dynroutedone=run('route del %s gw %s' - %(vmip, gwentry), + %(vmip, cur_gw), shell='/bin/bash') #check and delete static route staroutedone=run('cat /etc/network/interfaces ' @@ -635,7 +664,7 @@ def __init__(self, args_str = None): capture=True) if dynroutedone == '1': dynroutedone=local('route del %s gw %s' - %(vmip, gwentry), + %(vmip, cur_gw), shell='/bin/bash') #check and delete static route staroutedone=local('cat /etc/network/interfaces ' diff --git a/contrail_provisioning/storage/storagefs/setup.py b/contrail_provisioning/storage/storagefs/setup.py index 1605dc08..ce0e4e96 100755 --- a/contrail_provisioning/storage/storagefs/setup.py +++ b/contrail_provisioning/storage/storagefs/setup.py @@ -602,7 +602,8 @@ def do_crush_map_pool_config(self): self._args.storage_hostnames, self._args.storage_disk_config, self._args.storage_ssd_disk_config, - self._args.storage_chassis_config) + self._args.storage_chassis_config, + self._args.storage_replica_size) #end do_crush_map_pool_config() # Function for NFS cinder configuration @@ -739,31 +740,59 @@ def do_create_monlist(self): global ceph_mon_count global ceph_all_hosts + # Find existing mons + ceph_mon_entries = local('ceph mon stat 2>&1 |grep quorum | \ + awk \'{print $11}\'', capture=True) + if ceph_mon_entries != '': + ceph_mon_list = ceph_mon_entries.split(',') + for entry in ceph_mon_list: + ceph_mon_count += 1; + ceph_mon_hosts_list.append(entry) + # first create master monitor list - for entries, entry_token, hostname in zip(self._args.storage_hosts, self._args.storage_host_tokens, self._args.storage_hostnames): + for entries, entry_token, hostname in zip(self._args.storage_hosts, + self._args.storage_host_tokens, + self._args.storage_hostnames): if entries == self._args.storage_master: ceph_mon_hosts = ceph_mon_hosts + hostname + ' ' - ceph_mon_count += 1; - ceph_mon_hosts_list.append(hostname) + entry = '' + for entry in ceph_mon_hosts_list: + if entry == hostname: + break + if entry != hostname: + ceph_mon_count += 1; + ceph_mon_hosts_list.append(hostname) if self._args.storage_os_hosts[0] != 'none': for osnode in self._args.storage_os_hosts: if entries == osnode: ceph_mon_hosts = ceph_mon_hosts + hostname + ' ' - ceph_mon_count += 1; - ceph_mon_hosts_list.append(hostname) + entry = '' + for entry in ceph_mon_hosts_list: + if entry == hostname: + break + if entry != hostname: + ceph_mon_count += 1; + ceph_mon_hosts_list.append(hostname) # first try to use configured compute monitor list # if configured monitor list is empty then start # monitors on first "N" computes - # where master monitor list + "N" compute monitors <= MAX_MONS + # where master monitor list + "N" compute monitors < MAX_MONS if self._args.storage_mon_hosts[0] != 'none': for entries in self._args.storage_mon_hosts: - ceph_mon_count += 1; - if ceph_mon_count <= MAX_MONS: + if ceph_mon_count < MAX_MONS: ceph_mon_hosts = ceph_mon_hosts + entries + ' ' - ceph_mon_hosts_list.append(entries) + entry = '' + for entry in ceph_mon_hosts_list: + if entry == hostname: + break + if entry != hostname: + ceph_mon_count += 1; + ceph_mon_hosts_list.append(entries) else: - for entries, entry_token, hostname in zip(self._args.storage_hosts, self._args.storage_host_tokens, self._args.storage_hostnames): + for entries, entry_token, hostname in zip(self._args.storage_hosts, + self._args.storage_host_tokens, + self._args.storage_hostnames): if entries == self._args.storage_master: continue entry_hit = 0 @@ -773,13 +802,19 @@ def do_create_monlist(self): entry_hit = 1 break if entry_hit == 0: - ceph_mon_count += 1; - if ceph_mon_count <= MAX_MONS: + if ceph_mon_count < MAX_MONS: ceph_mon_hosts = ceph_mon_hosts + hostname + ' ' - ceph_mon_hosts_list.append(hostname) + entry = '' + for entry in ceph_mon_hosts_list: + if entry == hostname: + break + if entry != hostname: + ceph_mon_count += 1; + ceph_mon_hosts_list.append(hostname) for entries in self._args.storage_hostnames: ceph_all_hosts = ceph_all_hosts + entries + ' ' + # end do_create_monlist @@ -1445,6 +1480,16 @@ def do_tune_ceph(self): local('sudo openstack-config --set %s osd "osd disk threads" %s' %(CEPH_CONFIG_FILE, CEPH_DISK_THREADS)) + # change default heartbeat based on Replica size + if self._args.storage_replica_size != 'None': + heartbeat_timeout = int(self._args.storage_replica_size) * 60 + else: + heartbeat_timeout = 120 + local('ceph tell osd.* injectargs -- --osd_heartbeat_grace=%s' + %(heartbeat_timeout)) + local('sudo openstack-config --set %s osd "osd heartbeat grace" %s' + %(CEPH_CONFIG_FILE, heartbeat_timeout)) + # compute ceph.conf configuration done here for entries, entry_token in zip(self._args.storage_hosts, self._args.storage_host_tokens): @@ -1459,6 +1504,8 @@ def do_tune_ceph(self): %(CEPH_CONFIG_FILE, CEPH_OP_THREADS)) run('sudo openstack-config --set %s osd "osd disk threads" %s' %(CEPH_CONFIG_FILE, CEPH_DISK_THREADS)) + run('sudo openstack-config --set %s osd "osd heartbeat grace" %s' + %(CEPH_CONFIG_FILE, heartbeat_timeout)) return #end do_tune_ceph() @@ -2215,7 +2262,8 @@ def do_configure_glance_rbd(self): #Glance configuration on the storage master local('sudo openstack-config --set %s DEFAULT default_store rbd' %(GLANCE_API_CONF)) - local('sudo openstack-config --set %s DEFAULT known_stores glance.store.rbd.Store' + local('sudo openstack-config --set %s DEFAULT known_stores \ + glance.store.rbd.Store,glance.store.filesystem.Store,glance.store.http.Store' %(GLANCE_API_CONF)) local('sudo openstack-config --set %s DEFAULT show_image_direct_url True' %(GLANCE_API_CONF)) @@ -2239,8 +2287,8 @@ def do_configure_glance_rbd(self): run('sudo openstack-config --set %s DEFAULT \ default_store rbd' %(GLANCE_API_CONF)) - run('sudo openstack-config --set %s DEFAULT \ - known_stores glance.store.rbd.Store' + run('sudo openstack-config --set %s DEFAULT known_stores \ + glance.store.rbd.Store,glance.store.filesystem.Store,glance.store.http.Store' %(GLANCE_API_CONF)) run('sudo openstack-config --set %s DEFAULT \ show_image_direct_url True' @@ -2624,7 +2672,12 @@ def do_remove_osd(self): shell='/bin/bash') if osd_det != '': osd_num = osd_det.split('-')[1] - run('sudo stop ceph-osd id=%s' %(osd_num)) + osd_running = run('ps -ef | grep ceph-osd | \ + grep -v grep | \ + grep -w "\\-i %s" | wc -l' + %(osd_num)) + if osd_running != '0': + run('sudo stop ceph-osd id=%s' %(osd_num)) run('sudo ceph -k %s osd out %s' %(CEPH_ADMIN_KEYRING, osd_num)) run('sudo ceph osd crush remove osd.%s' @@ -2878,6 +2931,16 @@ def do_storage_setup(self): return #end do_storage_setup() + # Cleanup disk config + def do_cleanup_config(self): + if self._args.storage_directory_config[0] == 'none' and \ + self._args.storage_disk_config[0] == 'none' and \ + self._args.storage_ssd_disk_config[0] != 'none': + self._args.storage_disk_config = self._args.storage_ssd_disk_config + self._args.storage_ssd_disk_config = ['none'] + #end do_cleanup_config() + + # Main function for storage related configurations # Note: All the functions are idempotent. Any additions/modifications # should ensure that the behavior stays the same. @@ -2893,6 +2956,9 @@ def __init__(self, args_str = None): # Do the ssh key configuration self.do_ssh_config() + # Cleanup configuration + self.do_cleanup_config() + # Create monitor list self.do_create_monlist() # Following all are specific setups based on the setup_mode @@ -3009,6 +3075,7 @@ def _parse_args(self, args_str): parser.add_argument("--storage-setup-mode", help = "Storage configuration mode") parser.add_argument("--disks-to-remove", help = "Disks to remove", nargs="+", type=str) parser.add_argument("--hosts-to-remove", help = "Hosts to remove", nargs="+", type=str) + parser.add_argument("--storage-replica-size", help = "Replica size") self._args = parser.parse_args(remaining_argv)