forked from pyspextool/pyspextool
-
Notifications
You must be signed in to change notification settings - Fork 2
/
batch_cl.py
254 lines (222 loc) · 12.8 KB
/
batch_cl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# pyspextool batch driver function
# usage methods
#
# >> python batch_driver.py [base folder]
# >> python batch_driver.py [data folder] [cal folder] [proc folder] [qa folder]
# >> python batch_driver.py --reduce-only [base folder]
# >> python batch_driver.py --qa-only [base folder]
#
import argparse
import copy
import os
import pandas
import sys
from pyspextool.batch import batch
LOG_FILE_PREFIX_DEFAULT = 'log'
DRIVER_FILE_DEFAULT = 'driver.txt'
VERSION = batch.VERSION
#VERSION = '2024.02.25'
AUTHORS = [
'Adam Burgasser',
'Jean Marroquin',
'Evan Watson']
class runBatch():
def __init__(self):
parser = argparse.ArgumentParser(description='Runs the batch mode for pyspextool')
parser.add_argument('inputs', nargs='*',
help='main inputs, with the following options:'+\
'\n\t(1) no inputs assumes user is in currently in a base folder that contains a "data" folder with raw files, in which "cals", "proc", and "qa" folders exist or will be created;'+\
'\n\t(2) single input giving the full path to a base folder that contains the "data" folder and optionally "cals", "proc", and "qa" folders;'+\
'\n\t(3) two inputs giving the full path to the data folder and then full path to a base folder in which "cals", "proc", and "qa" folders exist or will be created; or'+\
'\n\t(4) three or four inputs giving the full paths to the "data", "cals", "proc", and optionally "qa" folders, in that order')
parser.add_argument('-l', metavar='log-file-prefix', nargs=1, default='',
required=False, help='set log file prefix (default is "log")')
parser.add_argument('-d', metavar='driver-filename', nargs=1, default='',
required=False, help='set driver file name (default is "driver.txt")')
parser.add_argument('--test', action='store_true',default=False,
required=False, help='set to test the pyspextool installation')
parser.add_argument('--organize', action='store_true',default=False,
required=False, help='organize the legacy archive download data files')
parser.add_argument('--organize-legacy', action='store_true',default=False,
required=False, help='organize the legacy archive download data files')
parser.add_argument('--organize-irsa', action='store_true',default=False,
required=False, help='organize the IRSA archive download data files')
parser.add_argument('--log-only', action='store_true',default=False,
required=False, help='set to just save a log file')
parser.add_argument('--driver-only', action='store_true',default=False,
required=False, help='set to just generate driver file')
parser.add_argument('--rebuild-log', action='store_true',default=False,
required=False, help='set to rebuild the log file')
parser.add_argument('--rebuild-driver', action='store_true',default=False,
required=False, help='set to rebuild the driver file from the log')
parser.add_argument('--reduce-only', action='store_true',default=False,
required=False, help='set to just reduce (no log or driver generation)')
parser.add_argument('--qa-only', action='store_true',default=False,
required=False, help='set to just generate QA page')
# parser.add_argument('--base', action='store_true',default=False,
# required=False, help='set to establish a base folder with data, proc and cals folder within (cals and proc are created if they do not exist)')
parser.add_argument('--qaplot-off', action='store_false',default=True,
required=False, help='set to turn OFF QA plots')
parser.add_argument('--overwrite', action='store_true',default=False,
required=False, help='set to automatically overwrite files, including log and driver')
parser.add_argument('--no-pause', action='store_true',default=False,
required=False, help='set to remove all pauses for user input')
# parser.add_argument('--verbose', action='store_true',default=False,
# required=False, help='set to return verbose feedback')
parser.add_argument('--no-cals', action='store_true',default=False,
required=False, help='set to skip calibration file creation')
parser.add_argument('--no-extract', action='store_true',default=False,
required=False, help='set to skip spectral extraction')
parser.add_argument('--no-combine', action='store_true',default=False,
required=False, help='set to skip spectral file combination')
parser.add_argument('--no-fluxtell', action='store_true',default=False,
required=False, help='set to skip flux and telluric correction')
parser.add_argument('--no-stitch', action='store_true',default=False,
required=False, help='set to skip order stitching')
parser.add_argument('--quiet', action='store_true',default=False,
required=False, help='set to return minimal feedback')
parser.add_argument('--version', action='store_true',default=False,
required=False, help='report version number of batch reduction code')
args = vars(parser.parse_args())
# print(args)
folders = args['inputs']
driver_file = args['d']
log_file_prefix = args['l']
base_folder = ''
# print(args)
# raise()
# give version number (only)
if args['quiet']==False: print('\npyspextool batch reduction code version {}\n'.format(VERSION))
if args['version']==True: return
# test
if args['test']==True:
batch.test()
return
# if nothing passed, assume we are using the local folder as the base folder
if len(folders)<1:
base_folder='./'
folders = [batch.REDUCTION_FOLDERS[0]]
# one input - assume to be base folder
if len(folders) == 1:
if base_folder=='': base_folder=folders[0]
if os.path.exists(base_folder)==False: raise ValueError('Cannot find base folder {}'.format(base_folder))
# elif os.path.exists(os.path.join(folders[0],batch.REDUCTION_FOLDERS[0]))==False: raise ValueError('Cannot find data folder under {}'.format(folders[0]))
# bfold = copy.deepcopy(folders[0])
folders[0] = os.path.join(base_folder,batch.REDUCTION_FOLDERS[0])
# organize legacy data?
if args['organize']==True or args['organize_legacy']==True:
batch.organizeLegacy(base_folder,verbose=(not args['quiet']),overwrite=args['overwrite'],makecopy=True)
if args['quiet']==False: print('\n\nFinished IRTF Legacy archive file organization\n\n')
return
# organize irsa data?
if args['organize_irsa']==True:
batch.organizeIRSA(base_folder,verbose=(not args['quiet']),overwrite=args['overwrite'],makecopy=True)
if args['quiet']==False: print('\n\nFinished IRTF IRSA archive file organization\n\n')
return
# two inputs - assume to be data folder and base folder
if len(folders) == 2:
if os.path.exists(folders[0])==False: raise ValueError('Cannot find data folder {}'.format(folders[0]))
if os.path.exists(folders[1])==False: raise ValueError('Cannot find base folder {}'.format(folders[1]))
if base_folder=='': base_folder = copy.deepcopy(folders[1])
folders = [folders[0]]
# set or create data, cals, proc, and qa folders
for i,nm in enumerate(batch.REDUCTION_FOLDERS):
nfold = os.path.join(base_folder,nm+'/')
if len(folders) <= i: folders.append(nfold)
# folders[i] = os.path.abspath(folders[i])
if os.path.exists(folders[i])==False:
os.mkdir(folders[i])
if args['quiet']==False: print('\nCreated {} folder {}'.format(nm,folders[i]))
# check folders
# folders = [os.path.abspath(f) for f in folders]
for i,f in enumerate(folders):
if f=='': raise ValueError('Empty path name for {} folder'.format(batch.REDUCTION_FOLDERS[i]))
if os.path.exists(f)==False: raise ValueError('Cannot find {} folder {}'.format(batch.REDUCTION_FOLDERS[i],f))
# generate log csv and html files and put in qa folder
if log_file_prefix=='': log_file_prefix = copy.deepcopy(LOG_FILE_PREFIX_DEFAULT)
log_file_prefix = os.path.join(folders[3],log_file_prefix)
if os.path.exists(log_file_prefix+'.html')==True and os.path.exists(log_file_prefix+'.csv')==True and args['overwrite']==False and args['rebuild_log']==False:
print('\nWARNING: html log file {} and csv log file {} already exists; use --overwrite if you want to overwrite or --rebuild-log to rebuild'.format(log_file_prefix+'.html',log_file_prefix+'.csv'))
else:
if args['driver_only']==False and args['reduce_only']==False and args['qa_only']==False:
dp = batch.processFolder(folders[0])
for x in ['.csv','.html']:
if os.path.exists(log_file_prefix+x) and args['overwrite']==False and args['rebuild_log']==False:
print('\nWARNING: {} log file {} already exists so not saving; use --overwrite to overwrite'.format(x,log_file_prefix+x))
else:
# if args['quiet']==False: print('\nWriting log to {}'.format(log_file_prefix+x))
batch.writeLog(dp,log_file_prefix+x)
# query to pause and check log
if args['no_pause']==False and args['log_only']==False: txt = input('\n\nCheck the LOG FILES {} and {} and press return when you are ready to proceed, or type CNTL-C to abort...\n\n'.format(log_file_prefix+'.csv',log_file_prefix+'.html'))
if args['log_only']==True:
print('\n\nLog files {} and {} created.'.format(log_file_prefix+'.csv',log_file_prefix+'.html'))
return
# generate driver file and put in proc folder
if driver_file=='': driver_file = copy.deepcopy(DRIVER_FILE_DEFAULT)
driver_file = os.path.join(folders[2],driver_file)
if os.path.exists(driver_file)==True and args['overwrite']==False and args['rebuild_driver']==False:
print('\nWARNING: driver file {} already exists so not saving; use --overwrite to overwrite or --rebuild-driver to rebuild'.format(driver_file))
else:
if args['reduce_only']==False and args['qa_only']==False:
if os.path.exists(log_file_prefix+'.csv')==True:
dp = pandas.read_csv(log_file_prefix+'.csv')
else:
dp = batch.processFolder(folders[0])
print('\nWARNING: could not find log file {}, this may be a problem later'.format(log_file_prefix+'.csv'))
if args['quiet']==False: print('\nGenerating driver file and writing to {}'.format(driver_file))
# print(driver_file,folders[0])
batch.writeDriver(dp,driver_file,data_folder=folders[0],verbose=(not args['quiet']),check=True,create_folders=True)
# query to pause and check driver
if args['no_pause']==False and args['driver_only']==False: txt = input('\n\nCheck the DRIVER FILE {} and press return when you are ready to proceed, or type CNTL-C to abort...\n\n'.format(driver_file))
if args['driver_only']==True:
print('\n\nDriver file {} created.'.format(driver_file))
return
# reduction - only need the driver file for this
##
## NOTE - SOMETHING WEIRD HERE WHERE CALIBRATION FILE CREATION BREAKS IF VERBOSE IS NOT SET
##
if args['qa_only']==False:
# if driver_file=='': driver_file = folders[0]
# if os.path.isdir(driver_file)==True: raise ValueError('Parameter you passed - {} - is a directory; provide path to driver file'.format(driver_file))
if os.path.exists(driver_file)==False: raise ValueError('Cannot find driver file {}'.format(driver_file))
par = batch.readDriver(driver_file)
# stop if there are no science or calibration files
# print(par)
# if 'OBS_SET' not in list(par.keys()):
if 'OBS_SET' not in [x[:7] for x in list(par.keys())]:
print('No science files listed in the driver file {}; recheck this file before proceeding'.format(driver_file))
return
if 'CAL_SETS' not in list(par.keys()):
print('No calibration sets listed in the driver file {}; recheck this file before proceeding'.format(driver_file))
return
# add in additional keywords for specific reduction steps:
if args['no_cals']==True: par['CALIBRATIONS']=False
if args['no_extract']==True: par['EXTRACT']=False
if args['no_combine']==True: par['COMBINE']=False
if args['no_fluxtell']==True: par['FLUXTELL']=False
if args['no_stitch']==True: par['STITCH']=False
if args['overwrite']==True: par['OVERWRITE']=False
if args['quiet']==False: print('\n\nReducing spectra\n\n')
batch.batchReduce(par,verbose=(not args['quiet']))
if args['reduce_only']==True:
print('\n\nReduction completed!\n\n')
return
# make qa plots
# if args['qa_only']==True:
# if len(folders)<2: raise ValueError('For option --qa-only, you must provide driver file and log file (csv)')
# if driver_file=='': driver_file = folders[0]
# if os.path.isdir(driver_file)==True:
# if len(folders)<4: raise ValueError('Could not determine driver file from inputs')
# driver_file = folders[2]+'driver.txt'
# if log_file_prefix=='': log_file_prefix = 'log'
# # if os.path.isdir(log_file_prefix)==True:
# # if len(folders)<4: raise ValueError('Could not determine log file from inputs')
# # log_file_prefix = folders[3]+'log'
# if os.path.exists(log_file_prefix+'.csv')==False: log_file_prefix = folders[3]+'log'
if os.path.exists(driver_file)==False: raise ValueError('Cannot find driver file {}'.format(driver_file))
if os.path.exists(log_file_prefix+'.csv')==False: raise ValueError('Cannot find log file {}'.format(log_file_prefix+'.csv'))
batch.makeQApage(driver_file,log_file_prefix+'.csv',verbose=(not args['quiet']))
print('\n\nQA page created, please review\n\n')
return
if __name__ == '__main__':
app = runBatch()