Skip to content

Commit

Permalink
Storage provision fix merge
Browse files Browse the repository at this point in the history
Closes-Bug: #1446391
Closes-Bug: #1447707
Closes-Bug: #1388449
Closes-Bug: #1446396
Closes-Bug: #1454898
Closes-Bug: #1457704
Closes-Bug: #1459835
Closes-Bug: #1460730
Closes-Bug: #1460730
Issues:
    OSDs flaps because of insufficient heartbeat timeout on
        large clusters
    Replica configured is overwritten when upgrade or setup_storage
        is run again.
    Live migration provision doesnt work if there are multiple subnets
    upgrade or setup storage creates new mons when the storage-compute
        order changes in the testbed.py
    if only ssd-disks is specified the pgs are stuck.
    When an image added with http client, glance add fails.
    If osd is not running, the remove disk fails as its trying
        to stop the osd.
Fix:
    Configured heartbeat based on the replica size.
    Added a configuration variable 'storage_replica_size' in testbed.py
        to specify the replica
    Addded fix to support multiple subnets for live migration.
    The current monitors are not taken into account for the total
        monitors. Fix added to take existing monitors into account.
    If there is only 'ssd-disks', code added to treat as 'disks'.
    The known store configuration is set to use only rbd.  This causes
        even the glance client to use only rbd, blocking http access.
    The quota for cinder is to be set based on the total space and
        not the current available space.
    Check added to stop osd only if osd is running.

Change-Id: I96a9a070eea1e0461c71566a3889a76f59828ef3
  • Loading branch information
Jeya ganesh babu J committed Jun 2, 2015
1 parent a133e9e commit cadb798
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 51 deletions.
2 changes: 2 additions & 0 deletions contrail_provisioning/storage/setup.py
Expand Up @@ -72,6 +72,7 @@ def parse_args(self, args_str):
parser.add_argument("--storage-setup-mode", help = "Configuration mode")
parser.add_argument("--disks-to-remove", help = "Disks to remove", nargs="+", type=str)
parser.add_argument("--hosts-to-remove", help = "Hosts to remove", nargs="+", type=str)
parser.add_argument("--storage-replica-size", help = "Replica size")


self._args = parser.parse_args(self.remaining_argv)
Expand Down Expand Up @@ -133,6 +134,7 @@ def enable_storage(self):

if self._args.hosts_to_remove:
storage_setup_args = storage_setup_args + " --hosts-to-remove %s" %(' '.join(self._args.hosts_to_remove))
storage_setup_args = storage_setup_args + " --storage-replica-size %s" %(self._args.storage_replica_size)

#Setup storage if storage is defined in testbed.py
with settings(host_string=self._args.storage_master, password=storage_master_passwd):
Expand Down
27 changes: 26 additions & 1 deletion contrail_provisioning/storage/storagefs/ceph_utils.py
Expand Up @@ -1408,7 +1408,8 @@ def do_pool_config(self, input_crush, storage_hostnames,
# Sets PG/PGP count.
# Sets ruleset based on pool/chassis configuration
def do_configure_pools(self, storage_hostnames, storage_disk_config,
storage_ssd_disk_config, chassis_config):
storage_ssd_disk_config, chassis_config,
replica_size = None):
global host_hdd_dict
global host_ssd_dict
global hdd_pool_count
Expand Down Expand Up @@ -1464,6 +1465,9 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config,
if host_hdd_dict[('hostcount', '%s' %(pool_index))] <= 1:
self.exec_local('sudo ceph osd pool set volumes_hdd size %s'
%(REPLICA_ONE))
elif replica_size != 'None':
self.exec_local('sudo ceph osd pool set volumes_hdd size %s'
%(replica_size))
else:
self.exec_local('sudo ceph osd pool set volumes_hdd size %s'
%(REPLICA_DEFAULT))
Expand Down Expand Up @@ -1491,6 +1495,10 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config,
self.exec_local('sudo ceph osd pool set volumes_hdd_%s size %s'
%(host_hdd_dict[('poolname','%s'
%(pool_index))], REPLICA_ONE))
elif replica_size != 'None':
self.exec_local('sudo ceph osd pool set volumes_hdd_%s size %s'
%(host_hdd_dict[('poolname','%s'
%(pool_index))], replica_size))
else:
self.exec_local('sudo ceph osd pool set volumes_hdd_%s size %s'
%(host_hdd_dict[('poolname','%s'
Expand Down Expand Up @@ -1521,6 +1529,11 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config,
%(REPLICA_ONE))
self.exec_local('sudo ceph osd pool set images size %s'
%(REPLICA_ONE))
elif replica_size != 'None':
self.exec_local('sudo ceph osd pool set volumes size %s'
%(replica_size))
self.exec_local('sudo ceph osd pool set images size %s'
%(replica_size))
else:
self.exec_local('sudo ceph osd pool set volumes size %s'
%(REPLICA_DEFAULT))
Expand Down Expand Up @@ -1560,6 +1573,9 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config,
if host_ssd_dict[('hostcount', '%s' %(pool_index))] <= 1:
self.exec_local('sudo ceph osd pool set volumes_ssd size %s'
%(REPLICA_ONE))
elif replica_size != 'None':
self.exec_local('sudo ceph osd pool set volumes_ssd size %s'
%(replica_size))
else:
self.exec_local('sudo ceph osd pool set volumes_ssd size %s'
%(REPLICA_DEFAULT))
Expand Down Expand Up @@ -1587,6 +1603,10 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config,
self.exec_local('sudo ceph osd pool set volumes_ssd_%s size %s'
%(host_ssd_dict[('poolname','%s'
%(pool_index))], REPLICA_DEFAULT))
elif replica_size != 'None':
self.exec_local('sudo ceph osd pool set volumes_ssd_%s size %s'
%(host_ssd_dict[('poolname','%s'
%(pool_index))], replica_size))
else:
self.exec_local('sudo ceph osd pool set volumes_ssd_%s size %s'
%(host_ssd_dict[('poolname','%s'
Expand Down Expand Up @@ -1625,6 +1645,11 @@ def do_configure_pools(self, storage_hostnames, storage_disk_config,
%(REPLICA_TWO))
self.exec_local('sudo ceph osd pool set images size %s'
%(REPLICA_TWO))
elif replica_size != 'None':
self.exec_local('sudo ceph osd pool set volumes size %s'
%(replica_size))
self.exec_local('sudo ceph osd pool set images size %s'
%(replica_size))
else:
rep_size = self.exec_local('sudo ceph osd pool get volumes size | \
awk \'{print $2}\'')
Expand Down
93 changes: 61 additions & 32 deletions contrail_provisioning/storage/storagefs/livemnfs_setup.py
Expand Up @@ -27,6 +27,24 @@ class SetupNFSLivem(object):
TMP_FSTAB='/tmp/fstab'
global NOVA_INST_GLOBAL
NOVA_INST_GLOBAL='/var/lib/nova/instances/global'
global MAX_RETRY_WAIT
MAX_RETRY_WAIT = 10

def check_vm(self, vmip):
retry = 0
time.sleep(10)
while True:
vmnavail=local('ping -c 5 %s | grep \" 100%% packet loss\" |wc -l' %(vmip) , capture=True, shell='/bin/bash')
if vmnavail == '0':
break
retry += 1
if retry > MAX_RETRY_WAIT:
vm_running=local('source /etc/contrail/openstackrc && nova list | grep -w " livemnfs " |grep ACTIVE |wc -l' , capture=True, shell='/bin/bash')
if vm_running != '0':
local('source /etc/contrail/openstackrc && nova reboot --hard livemnfs')
print 'Waiting for VM to come up'
time.sleep(10)
#end check_vm

def __init__(self, args_str = None):
print sys.argv[1:]
Expand Down Expand Up @@ -104,7 +122,7 @@ def __init__(self, args_str = None):
local('source /etc/contrail/openstackrc && nova boot --image livemnfs --flavor 100 --availability-zone nova:%s --nic net-id=%s livemnfs --meta storage_scope=local' %(nfs_livem_host, net_id), shell='/bin/bash')
else:
local('source /etc/contrail/openstackrc && nova start livemnfs', shell='/bin/bash')
wait_loop = 10
wait_loop = 100
while True:
vm_running=local('source /etc/contrail/openstackrc && nova list | grep livemnfs |grep ACTIVE |wc -l' , capture=True, shell='/bin/bash')
if vm_running == '1':
Expand Down Expand Up @@ -135,9 +153,12 @@ def __init__(self, args_str = None):
# The vmip is the actual ip assigned to the VM. Use this for the rest of the configurations
vmip = local('source /etc/contrail/openstackrc && nova show livemnfs |grep \"livemnfs network\"|awk \'{print $5}\'', capture=True, shell='/bin/bash')

gwnetaddr = ''
for hostname, entries, entry_token in zip(self._args.storage_hostnames, self._args.storage_hosts, self._args.storage_host_tokens):
if hostname == vmhost:
with settings(host_string = 'root@%s' %(entries), password = entry_token):
gwaddr = run('ip addr show |grep -w %s | awk \'{print $2}\'' %(entries))
gwnetaddr = netaddr.IPNetwork('%s' %(gwaddr)).cidr
#Set autostart vm after node reboot
run('openstack-config --set /etc/nova/nova.conf DEFAULT resume_guests_state_on_host_boot True')
#check for vgw interface
Expand Down Expand Up @@ -230,12 +251,26 @@ def __init__(self, args_str = None):
'/tmp/interfaces'
%(vmip), shell='/bin/bash')
run('cp /tmp/interfaces /etc/network/interfaces');
cur_gw = gwentry
# Check if the system is in the same network as the
# host running the livemnfs vm. If not, use the real
# Gateway as gw for the VM ip instead of using the
# compute node.
if gwnetaddr != '':
diff_net = run('ip route show | grep -w %s | \
grep via | wc -l' %(gwnetaddr))
if diff_net == '0':
cur_gw = gwentry
else:
cur_gw = run('ip route show | grep -w %s | \
grep via | awk \'{print $3}\''
%(gwnetaddr))

#check for dynamic route on the vm host
dynroutedone=run('netstat -rn |grep %s|wc -l' %(vmip), shell='/bin/bash')
if dynroutedone == '0':
dynroutedone=run('route add %s gw %s'
%(vmip, gwentry),
%(vmip, cur_gw),
shell='/bin/bash')
#check and add static route on master
staroutedone=run('cat /etc/network/interfaces '
Expand All @@ -246,13 +281,13 @@ def __init__(self, args_str = None):
'/etc/network/interfaces');
run('echo \"up route add %s gw %s\" >> '
'/etc/network/interfaces'
%(vmip, gwentry));
%(vmip, cur_gw));
# Add route to the local master node.
dynroutedone=local('netstat -rn |grep %s|wc -l'
%(vmip), shell='/bin/bash',
capture=True)
if dynroutedone == '0':
local('route add %s gw %s' %(vmip, gwentry),
local('route add %s gw %s' %(vmip, cur_gw),
shell='/bin/bash')
#check and add static route on master
staroutedone=local('cat /etc/network/interfaces '
Expand All @@ -264,16 +299,17 @@ def __init__(self, args_str = None):
'/etc/network/interfaces');
local('echo \"up route add %s gw %s\" >> '
'/etc/network/interfaces'
%(vmip, gwentry));
%(vmip, cur_gw));

#cinder volume creation and attaching to VM
avail=local('rados df | grep avail | awk \'{ print $3 }\'', capture = True, shell='/bin/bash')
# use 30% of the available space for the instances for now.
# TODO need to check if this needs to be configurable
avail_gb = int(avail)/1024/1024/2/3
print avail_gb
# update quota if available is > 1T
quota_gb = (avail_gb * 3)
# update quota based on Total size
total=local('rados df | grep "total space" | awk \'{ print $3 }\'', capture = True, shell='/bin/bash')
quota_gb = int(total)/1024/1024/2
admintenantid=local('source /etc/contrail/openstackrc && keystone tenant-list |grep " admin" | awk \'{print $2}\'' , capture=True, shell='/bin/bash')
local('source /etc/contrail/openstackrc && cinder quota-update --gigabytes=%d %s' %(quota_gb, admintenantid), capture=True, shell='/bin/bash')

Expand Down Expand Up @@ -302,12 +338,7 @@ def __init__(self, args_str = None):
if volvmattached == '0':
return

while True:
vmnavail=local('ping -c 5 %s | grep \" 100%% packet loss\" |wc -l' %(vmip) , capture=True, shell='/bin/bash')
if vmnavail == '0':
break
print 'Waiting for VM to come up'
time.sleep(10)
self.check_vm(vmip)
with settings(host_string = 'livemnfs@%s' %(vmip), password = 'livemnfs'):
mounted=run('sudo cat /proc/mounts|grep livemnfs|wc -l')
if mounted == '0':
Expand All @@ -316,15 +347,7 @@ def __init__(self, args_str = None):
if vdbavail == '0':
print 'Disk not available yet. Need to reboot VM'
vdbavail=run('sudo reboot')
time.sleep(10)
while True:
print 'Waiting for VM to come up'
time.sleep(10)
vmnavail=local('ping -c 5 %s | grep \" 100%% packet loss\" |wc -l' %(vmip),
capture=True, shell='/bin/bash')
if vmnavail == '0':
time.sleep(10)
break
self.check_vm(vmip)
else:
break
vdbavail=run('sudo parted /dev/vdb print |grep ext4|wc -l')
Expand Down Expand Up @@ -387,14 +410,7 @@ def __init__(self, args_str = None):
run('sudo service nfs-kernel-server restart > /tmp/nfssrv.out', shell='/bin/bash')
time.sleep(2)
vdbavail=run('sudo reboot')
time.sleep(10)
while True:
print 'Waiting for VM to come up'
time.sleep(10)
vmnavail=local('ping -c 5 %s | grep \" 100%% packet loss\" |wc -l' %(vmip) , capture=True, shell='/bin/bash')
if vmnavail == '0':
time.sleep(10)
break
self.check_vm(vmip)

for hostname, entries, entry_token in zip(self._args.storage_hostnames, self._args.storage_hosts, self._args.storage_host_tokens):
with settings(host_string = 'root@%s' %(entries), password = entry_token):
Expand Down Expand Up @@ -567,6 +583,7 @@ def __init__(self, args_str = None):
print 'Cannot find vm ip. Cannot continue unconfigure'
return

gwnetaddr = ''
for hostname, entries, entry_token in zip(self._args.storage_hostnames, self._args.storage_hosts, self._args.storage_host_tokens):
if hostname == vmhost:
with settings(host_string = 'root@%s' %(entries), password = entry_token):
Expand All @@ -590,6 +607,8 @@ def __init__(self, args_str = None):
break
gateway_id = gateway_id + 1

gwaddr = run('ip addr show |grep -w %s | awk \'{print $2}\'' %(entries))
gwnetaddr = netaddr.IPNetwork('%s' %(gwaddr)).cidr
#check for dynamic route on the vm host
dynroutedone=run('netstat -rn |grep %s|wc -l' %(vmip), shell='/bin/bash')
if dynroutedone == '1':
Expand All @@ -612,12 +631,22 @@ def __init__(self, args_str = None):
self._args.storage_host_tokens):
if gwhostname == vmhost:
gwentry = gwentries
cur_gw = gwentry
if gwnetaddr != '':
diff_net = run('ip route show | grep -w %s | \
grep via | wc -l' %(gwnetaddr))
if diff_net == '0':
cur_gw = gwentry
else:
cur_gw = run('ip route show | grep -w %s | \
grep via | awk \'{print $3}\''
%(gwnetaddr))
#check for dynamic route on the vm host
dynroutedone=run('netstat -rn |grep %s|wc -l'
%(vmip), shell='/bin/bash')
if dynroutedone == '1':
dynroutedone=run('route del %s gw %s'
%(vmip, gwentry),
%(vmip, cur_gw),
shell='/bin/bash')
#check and delete static route
staroutedone=run('cat /etc/network/interfaces '
Expand All @@ -635,7 +664,7 @@ def __init__(self, args_str = None):
capture=True)
if dynroutedone == '1':
dynroutedone=local('route del %s gw %s'
%(vmip, gwentry),
%(vmip, cur_gw),
shell='/bin/bash')
#check and delete static route
staroutedone=local('cat /etc/network/interfaces '
Expand Down

0 comments on commit cadb798

Please sign in to comment.