diff --git a/CHANGELOG.txt b/CHANGELOG.txt index e2f7ec33..738ba7a8 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -2,10 +2,11 @@ master ====== Major: +* MSNoise can now work with subdaily CCFs natively, from CC to dv/v! * added support for Location IDs: TODO + add documentation * this CHANGES the structure of the database !! TODO should we provide an "upgrade" tool? * changes to the schema: Station (and TODO Filter too) -* supprort for PostgreSQL databases. TODO: check if removing index_hints really affect performances (should not) +* support for PostgreSQL databases. TODO: check if removing index_hints really affect performances (should not) Changes: - msnoise diff --git a/msnoise/api.py b/msnoise/api.py index 4bc359e1..cfc4d2cc 100644 --- a/msnoise/api.py +++ b/msnoise/api.py @@ -293,11 +293,10 @@ def get_params(session): params.all_components = np.unique(params.components_to_compute_single_station + \ params.components_to_compute) - if params.mov_stack.count(',') == 0: - params.mov_stack = [int(params.mov_stack), ] + if params.mov_stack.count(',') == 1: + params.mov_stack = [params.mov_stack, ] else: - params.mov_stack = [int(mi) for mi in params.mov_stack.split(',')] - + params.mov_stack = params.mov_stack return params # FILTERS PART @@ -1133,7 +1132,7 @@ def export_allcorr(session, ccfid, data): df = pd.DataFrame().from_dict(data).T df.columns = get_t_axis(session) - df.to_hdf(os.path.join(path, date+'.h5'), 'data') + df.to_hdf(os.path.join(path, date+'.h5'), key='data') del df return @@ -1149,7 +1148,7 @@ def export_allcorr2(session, ccfid, data): df = pd.DataFrame().from_dict(data).T df.columns = get_t_axis(session) - df.to_hdf(os.path.join(path, date+'.h5'), 'data') + df.to_hdf(os.path.join(path, date+'.h5'), key='data') del df return @@ -1499,7 +1498,7 @@ def get_mwcs(session, station1, station2, filterid, components, date, """ TODO """ - file = os.path.join('MWCS', "%02i" % filterid, "%03i_DAYS" % mov_stack, + file = os.path.join('MWCS', "%02i" % filterid, "%s_%s" % (mov_stack[0], mov_stack[1]), components, "%s_%s" % (station1, station2), '%s.txt' % date) if os.path.isfile(file): @@ -1537,7 +1536,10 @@ def get_results_all(session, station1, station2, filterid, components, dates, station1, station2, components) results = [] for date in dates: - fname = os.path.join(path, date.strftime('%Y-%m-%d.h5')) + if isinstance(date, str): + fname = os.path.join(path, date+".h5") + else: + fname = os.path.join(path, date.strftime('%Y-%m-%d.h5')) if os.path.isfile(fname): df = pd.read_hdf(fname, 'data', parse_dates=True) df.index = pd.to_datetime(df.index) @@ -1553,6 +1555,7 @@ def get_results_all(session, station1, station2, filterid, components, dates, dr = xr.DataArray(result, coords=[times, taxis], dims=["times", "taxis"]).dropna("times", how="all") dr.name = "CCF" + dr = dr.sortby('times') return dr.to_dataset() else: return pd.DataFrame() @@ -1695,9 +1698,13 @@ def build_movstack_datelist(session): elif begin == "1970-01-01": # TODO this fails when the DA is empty start = session.query(DataAvailability).order_by( DataAvailability.starttime).first().starttime.date() - end = datetime.datetime.strptime(end, '%Y-%m-%d').date() else: start = datetime.datetime.strptime(begin, '%Y-%m-%d').date() + + if end == "2100-01-01": + end = session.query(DataAvailability).order_by( + DataAvailability.endtime.desc()).first().endtime.date() + else: end = datetime.datetime.strptime(end, '%Y-%m-%d').date() end = min(end, datetime.date.today()) datelist = pd.date_range(start, end).map(lambda x: x.date()) @@ -2186,7 +2193,7 @@ def get_dvv(session, filterid, components, dates, print("No Data for %s m%i f%i" % (components, mov_stack, filterid)) alldf = pd.concat(alldf) - print(mov_stack, alldf.head()) + # print(mov_stack, alldf.head()) if 'alldf' in locals(): errname = "E" + dttname alldf.to_csv("tt.csv") @@ -2215,7 +2222,7 @@ def get_dvv(session, filterid, components, dates, def xr_save_ccf(station1, station2, components, filterid, mov_stack, taxis, new, overwrite=False): path = os.path.join("STACKS2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, "%s" % components) + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components) fn = "%s_%s.nc" % (station1, station2) fullpath = os.path.join(path, fn) if overwrite: @@ -2230,7 +2237,7 @@ def xr_save_ccf(station1, station2, components, filterid, mov_stack, taxis, new, def xr_get_ccf(station1, station2, components, filterid, mov_stack, taxis): path = os.path.join("STACKS2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, "%s" % components) + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components) fn = "%s_%s.nc" % (station1, station2) fullpath = os.path.join(path, fn) @@ -2270,12 +2277,12 @@ def xr_get_ref(station1, station2, components, filterid, taxis): def xr_save_mwcs(station1, station2, components, filterid, mov_stack, taxis, dataframe): fn = os.path.join("MWCS2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components, "%s_%s.nc" % (station1, station2)) if not os.path.isdir(os.path.split(fn)[0]): os.makedirs(os.path.split(fn)[0]) - d = dataframe.stack().stack() + d = dataframe.stack(future_stack=True).stack(future_stack=True) d.index = d.index.set_names(["times", "keys", "taxis"]) d = d.reorder_levels(["times", "taxis", "keys"]) d.columns = ["MWCS"] @@ -2289,7 +2296,7 @@ def xr_save_mwcs(station1, station2, components, filterid, mov_stack, taxis, dat def xr_get_mwcs(station1, station2, components, filterid, mov_stack): fn = os.path.join("MWCS2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components, "%s_%s.nc" % (station1, station2)) if not os.path.isfile(fn): @@ -2315,12 +2322,12 @@ def xr_save_dtt(station1, station2, components, filterid, mov_stack, dataframe): * opened using the given file path, and the stacked data is inserted or updated in the file. The resulting dataset is then saved and the file is closed. """ fn = os.path.join("DTT2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components, "%s_%s.nc" % (station1, station2)) if not os.path.isdir(os.path.split(fn)[0]): os.makedirs(os.path.split(fn)[0]) - d = dataframe.stack() + d = dataframe.stack(future_stack=True) d.index = d.index.set_names(["times", "keys"]) d.columns = ["DTT"] dr = xr_create_or_open(fn, taxis=[], name="DTT") @@ -2343,7 +2350,7 @@ def xr_get_dtt(station1, station2, components, filterid, mov_stack): * result. """ fn = os.path.join("DTT2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components, "%s_%s.nc" % (station1, station2)) if not os.path.isfile(fn): @@ -2356,15 +2363,15 @@ def xr_get_dtt(station1, station2, components, filterid, mov_stack): def xr_save_dvv(components, filterid, mov_stack, dataframe): fn = os.path.join("DVV2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s.nc" % components) if not os.path.isdir(os.path.split(fn)[0]): os.makedirs(os.path.split(fn)[0]) if dataframe.columns.nlevels > 1: - d = dataframe.stack().stack() + d = dataframe.stack(future_stack=True).stack(future_stack=True) else: - d = dataframe.stack() + d = dataframe.stack(future_stack=True) level_names = ["times", "level1", "level0"] d.index = d.index.set_names(level_names[:d.index.nlevels]) @@ -2383,7 +2390,7 @@ def xr_save_dvv(components, filterid, mov_stack, dataframe): def xr_get_dvv(components, filterid, mov_stack): fn = os.path.join("DVV2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s.nc" % components) if not os.path.isfile(fn): # logging.error("FILE DOES NOT EXIST: %s, skipping" % fn) @@ -2404,7 +2411,7 @@ def wavg(group, dttname, errname): """ d = group[dttname] - group[errname][group[errname] == 0] = 1e-6 + group.loc[group[errname] == 0,errname] = 1e-6 w = 1. / group[errname] try: wavg = (d * w).sum() / w.sum() @@ -2434,7 +2441,7 @@ def wstd(group, dttname, errname): Note: This method uses the `np` module from NumPy. """ d = group[dttname] - group[errname][group[errname] == 0] = 1e-6 + group.loc[group[errname] == 0,errname] = 1e-6 w = 1. / group[errname] wavg = (d * w).sum() / w.sum() N = len(np.nonzero(w.values)[0]) diff --git a/msnoise/default.csv b/msnoise/default.csv index 3a0ed408..5890e747 100644 --- a/msnoise/default.csv +++ b/msnoise/default.csv @@ -10,7 +10,7 @@ enddate,2100-01-01,End Date to process: [2100-01-01]='No end',str,,, analysis_duration,86400,Duration of the Analysis in seconds (this shouldn't be changed for now),float,,, cc_sampling_rate,20,Sampling Rate for the CrossCorrelation (in Hz),float,,, cc_normalisation,NO,"Normalisation method for individual `corr_duration`) CCFs, default is not normalized (NO), but can be normalized based on the power of the two traces (POW), or by the maximum (MAX) or absolute maximum (ABSMAX) of the CCF",str,NO/POW/MAX/ABSMAX,, -resampling_method,Lanczos,Resampling method,str,Lanczos/Decimate,, +resampling_method,Decimate,Resampling method,str,Lanczos/Decimate,, preprocess_lowpass,8,Preprocessing Low-pass value (in Hz),float,,, preprocess_highpass,0.01,Preprocessing High-pass value (in Hz),float,,, preprocess_max_gap,10,Preprocessing maximum gap length that will be filled by interpolation (in seconds),float,,, @@ -35,11 +35,11 @@ cc_type,CC,Cross-Correlation type,str,,, components_to_compute_single_station,,"List (comma separated) of components within a single station. ZZ would be the autocorrelation of Z component, while ZE or ZN are the cross-components. Defaults to [], no single-station computations are done.",str,,, cc_type_single_station_AC,CC,Auto-Correlation type,str,,, cc_type_single_station_SC,CC,Cross-Correlation type for Cross-Components,str,,, -keep_all,N,Keep all cross-corr (length: ``corr_duration``),bool,Y/N,, +keep_all,Y,Keep all cross-corr (length: ``corr_duration``),bool,Y/N,, keep_days,Y,Keep all daily cross-corr,bool,Y/N,, ref_begin,1970-01-01,Beginning or REF stacks. Can be absolute (2012-01-01) or relative (-100) days,str,,, ref_end,2100-01-01,End or REF stacks. Same as ``ref_begin``,str,,, -mov_stack,5,"Number of days to stack for the Moving-window stacks ([5]= [day-4:day]), can be a comma-separated list 1,2,5,10",str,,, +mov_stack,"(('1d','1d'))","A list of two parameters: the time to ""roll"" over (default 1 day) and the granularity (step) of the resulting stacked CCFs (default 1 day) to stack for the Moving-window stacks. This can be a list of tuples, e.g. (('1d','1d'),('2d','1d')) corresponds to the MSNoise 1.6 ""1,2"" before. Time deltas can be anything pandas can interpret (""d"", ""min"", ""sec"", etc).",eval,,, export_format,MSEED,Export stacks in which format(s) ?,str,SAC/MSEED/BOTH,, sac_format,doublets,Format for SAC stacks ?,str,doublets/clarke,, dtt_lag,static,How is the lag window defined,str,dynamic/static,, diff --git a/msnoise/plots/ccftime.py b/msnoise/plots/ccftime.py index 090217bf..003c7372 100644 --- a/msnoise/plots/ccftime.py +++ b/msnoise/plots/ccftime.py @@ -45,12 +45,14 @@ from ..api import * -def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, seismic=False, +def main(sta1, sta2, filterid, components, mov_stackid=1, ampli=5, seismic=False, show=False, outfile=None, envelope=False, refilter=None, normalize=None, loglevel="INFO", **kwargs): logger = get_logger('msnoise.cc_plot_ccftime', loglevel, with_pid=True) db = connect() + params = get_params(db) + mov_stack = params.mov_stack[mov_stackid-1] maxlag = float(get_config(db, 'maxlag')) samples = get_maxlag_samples(db) cc_sampling_rate = float(get_config(db, 'cc_sampling_rate')) @@ -76,8 +78,8 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, seismic=False, pair = "%s:%s" % (sta1, sta2) - logger.info("Fetching CCF data for %s-%s-%i-%i" % (pair, components, filterid, - mov_stack)) + logger.info("Fetching CCF data for %s-%s-%i-%s" % (pair, components, filterid, + str(mov_stack))) try: stack_total = xr_get_ccf(sta1, sta2, components, filterid, mov_stack, taxis) except FileNotFoundError as fullpath: @@ -118,9 +120,9 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, seismic=False, plt.xlabel("Lag Time (s)") plt.axhline(0, lw=0.5, c='k') plt.grid() - title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %d' %\ + title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %i (%s_%s)' %\ (sta1, sta2, components, - filterid, low, high, mov_stack) + filterid, low, high, mov_stackid, mov_stack[0], mov_stack[1]) if refilter: title += ", Re-filtered (%.2f - %.2f Hz)" % (freqmin, freqmax) plt.title(title) @@ -138,10 +140,11 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, seismic=False, if outfile: if outfile.startswith("?"): pair = pair.replace(':', '-') - outfile = outfile.replace('?', '%s-%s-f%i-m%i' % (pair, + outfile = outfile.replace('?', '%s-%s-f%i-m%s_%s' % (pair, components, filterid, - mov_stack)) + mov_stack[0], + mov_stack[1])) outfile = "ccftime " + outfile logger.info("output to: %s" % outfile) plt.savefig(outfile) diff --git a/msnoise/plots/dvv.py b/msnoise/plots/dvv.py index b6ce9817..fd1f7195 100644 --- a/msnoise/plots/dvv.py +++ b/msnoise/plots/dvv.py @@ -20,7 +20,7 @@ from ..api import * -def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, +def main(mov_stackid=None, dttname="M", components='ZZ', filterid=1, pairs=[], showALL=False, show=False, outfile=None, loglevel="INFO"): logger = get_logger('msnoise.cc_dvv_plot_dvv', loglevel, with_pid=True) @@ -28,8 +28,10 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, params = get_params(db) start, end, datelist = build_movstack_datelist(db) - if mov_stack and mov_stack != 0: + if mov_stackid and mov_stackid != "": + mov_stack = params.mov_stack[mov_stackid - 1] mov_stacks = [mov_stack, ] + print(mov_stack) else: mov_stacks = params.mov_stack @@ -37,7 +39,7 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, components = components.split(",") else: components = [components, ] - + print(mov_stacks) low = high = 0.0 filter = get_filters(db, ref=filterid) low = float(filter.low) @@ -52,7 +54,7 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, plt.sca(axes[i]) except: plt.sca(axes) - plt.title('%i Days Moving Window' % mov_stack) + # plt.title('%s Moving Window' % mov_stack) for comps in components: try: dvv = xr_get_dvv(comps, filterid, mov_stack) @@ -69,9 +71,9 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=4, ncol=2, borderaxespad=0.) left, right = dvv.index[0], dvv.index[-1] - plt.title('1 Day') + plt.title("Stack %i (%s_%s)"% (mov_stackid or i+1, mov_stack[0], mov_stack[1])) else: - plt.title('%i Days Moving Window' % mov_stack) + plt.title("Stack %i (%s_%s)"% (i+1, mov_stack[0], mov_stack[1])) plt.xlim(left, right) plt.grid(True) @@ -84,9 +86,10 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, if outfile: if outfile.startswith("?"): if len(mov_stacks) == 1: - outfile = outfile.replace('?', '%s-f%i-m%i-M%s' % (components, + outfile = outfile.replace('?', '%s-f%i-m%s_%s-M%s' % (components, filterid, - mov_stack, + mov_stack[0], + mov_stack[1], dttname)) else: outfile = outfile.replace('?', '%s-f%i-M%s' % (components, diff --git a/msnoise/plots/interferogram.py b/msnoise/plots/interferogram.py index 0cea61c7..d0943d94 100644 --- a/msnoise/plots/interferogram.py +++ b/msnoise/plots/interferogram.py @@ -26,7 +26,7 @@ from ..api import * -def main(sta1, sta2, filterid, components, mov_stack=1, show=True, +def main(sta1, sta2, filterid, components, mov_stackid=1, show=True, outfile=None, refilter=None, loglevel="INFO", **kwargs): logger = get_logger('msnoise.cc_plot_interferogram', loglevel, with_pid=True) @@ -51,8 +51,14 @@ def main(sta1, sta2, filterid, components, mov_stack=1, show=True, pair = "%s:%s" % (sta1, sta2) - logger.info("Fetching CCF data for %s-%s-%i-%i" % (pair, components, filterid, + params = get_params(db) + mov_stack = params.mov_stack[mov_stackid - 1] + # print(mov_stack) + + logger.info("Fetching CCF data for %s-%s-%i-%s" % (pair, components, filterid, mov_stack)) + + try: data = xr_get_ccf(sta1, sta2, components, filterid, mov_stack, taxis) except FileNotFoundError as fullpath: @@ -89,9 +95,9 @@ def main(sta1, sta2, filterid, components, mov_stack=1, show=True, else: plt.ylim(-maxlag, maxlag) - title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %d' % \ - (sta1.replace('_', '.'), sta2.replace('_', '.'), components, - filterid, low, high, mov_stack) + title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %i (%s_%s)' % \ + (sta1, sta2, components, + filterid, low, high, mov_stackid, mov_stack[0], mov_stack[1]) if refilter: title += ", Re-filtered (%.2f - %.2f Hz)" % (freqmin, freqmax) plt.title(title) @@ -101,10 +107,11 @@ def main(sta1, sta2, filterid, components, mov_stack=1, show=True, if outfile: if outfile.startswith("?"): pair = pair.replace(':', '-') - outfile = outfile.replace('?', '%s-%s-f%i-m%i' % (pair, + outfile = outfile.replace('?', '%s-%s-f%i-m%s_%s' % (pair, components, filterid, - mov_stack)) + mov_stack[0], + mov_stack[1])) outfile = "interferogram " + outfile logger.info("output to: %s" % outfile) plt.savefig(outfile) diff --git a/msnoise/plots/mwcs.py b/msnoise/plots/mwcs.py index e623863d..29d1a6b7 100644 --- a/msnoise/plots/mwcs.py +++ b/msnoise/plots/mwcs.py @@ -72,8 +72,9 @@ def plot_lags(minlag, maxlag): maxlag2 = minlag + dtt_width - - logger.info("Fetching CCF data for %s-%s-%i-%i" % (pair, components, filterid, + params = get_params(db) + mov_stack = params.mov_stack[mov_stack-1] + logger.info("Fetching CCF data for %s-%s-%i-%s" % (pair, components, filterid, mov_stack)) id = [] @@ -99,7 +100,7 @@ def plot_lags(minlag, maxlag): alldt = mwcs.M allcoh = mwcs.MCOH id = mwcs.index - print(mwcs.M) + # print(mwcs.M) alldt = alldt.resample('D').mean() allcoh = allcoh.resample('D').mean() @@ -161,7 +162,7 @@ def plot_lags(minlag, maxlag): plt.xlabel('Coherence') plt.ylabel("Lag Time (s)") - name = '%s-%s f%i m%i' % (sta1, sta2, filterid, mov_stack) + name = '%s-%s f%i m%s' % (sta1, sta2, filterid, mov_stack) name = name.replace('_', '.') plt.suptitle(name) @@ -169,10 +170,11 @@ def plot_lags(minlag, maxlag): if outfile: if outfile.startswith("?"): pair = pair.replace(':', '-') - outfile = outfile.replace('?', '%s-%s-f%i-m%i' % (pair, + outfile = outfile.replace('?', '%s-%s-f%i-m%s_%s' % (pair, components, filterid, - mov_stack)) + mov_stack[0], + mov_stack[1])) outfile = "mwcs " + outfile logger.info("output to: %s" % outfile) plt.savefig(outfile) diff --git a/msnoise/plots/spectime.py b/msnoise/plots/spectime.py index 987300fe..0395e2eb 100644 --- a/msnoise/plots/spectime.py +++ b/msnoise/plots/spectime.py @@ -45,10 +45,10 @@ from msnoise.api import build_movstack_datelist, connect, get_config, \ get_filters, get_results, check_stations_uniqueness, xr_get_ccf,\ - get_t_axis, get_logger + get_t_axis, get_logger, get_params -def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, show=False, +def main(sta1, sta2, filterid, components, mov_stackid=1, ampli=5, show=False, outfile=False, refilter=None, startdate=None, enddate=None, loglevel="INFO", **kwargs): logger = get_logger('msnoise.cc_plot_spectime', loglevel, @@ -75,8 +75,9 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, show=False, sta2 = check_stations_uniqueness(db, sta2) pair = "%s:%s" % (sta1, sta2) - - logger.info("Fetching CCF data for %s-%s-%i-%i" % (pair, components, filterid, + params = get_params(db) + mov_stack = params.mov_stack[mov_stackid-1] + logger.info("Fetching CCF data for %s-%s-%i-%s" % (pair, components, filterid, mov_stack)) stack_total = xr_get_ccf(sta1, sta2, components, filterid, mov_stack, taxis) @@ -116,9 +117,9 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, show=False, ax.set_xscale('log') ax.grid() - title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %d' %\ - (sta1.replace('_', '.'), sta2.replace('_', '.'), components, - filterid, low, high, mov_stack) + title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %i (%s_%s)' % \ + (sta1, sta2, components, + filterid, low, high, mov_stackid, mov_stack[0], mov_stack[1]) if refilter: title += ", Re-filtered (%.2f - %.2f Hz)" % (freqmin, freqmax) ax.set_title(title) @@ -127,10 +128,11 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, show=False, if outfile: if outfile.startswith("?"): pair = pair.replace(':', '-') - outfile = outfile.replace('?', '%s-%s-f%i-m%i' % (pair, + outfile = outfile.replace('?', '%s-%s-f%i-m%s_%s' % (pair, components, filterid, - mov_stack)) + mov_stack[0], + mov_stack[1])) outfile = "spectime " + outfile logger.info("output to: %s" % outfile) plt.savefig(outfile) diff --git a/msnoise/preprocessing.py b/msnoise/preprocessing.py index bdbbe07e..44790367 100644 --- a/msnoise/preprocessing.py +++ b/msnoise/preprocessing.py @@ -100,7 +100,7 @@ def preprocess(stations, comps, goal_day, params, responses=None, loglevel="INFO net, sta, loc = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability( - db, net=net, sta=sta, loc=loc, starttime=gd, endtime=gd) + db, net=net, sta=sta, loc=loc, starttime=gd, endtime=gd + datetime.timedelta(seconds=86401)) for comp in comps: datafiles[station][comp] = [] for file in files: diff --git a/msnoise/s03compute_no_rotation.py b/msnoise/s03compute_no_rotation.py index 4bfdd21f..0fd042d9 100644 --- a/msnoise/s03compute_no_rotation.py +++ b/msnoise/s03compute_no_rotation.py @@ -415,8 +415,11 @@ def main(loglevel="INFO"): # psds[iZ] = mm # define pairwise CCs + # TODO document that MASSIVE change !! + tmptime = tmp[0].stats.starttime.datetime thisdate = tmptime.strftime("%Y-%m-%d") + tmptime = tmp[0].stats.endtime.datetime thistime = tmptime.strftime("%Y-%m-%d %H:%M:%S") # Standard operator for CC diff --git a/msnoise/s04_stack2.py b/msnoise/s04_stack2.py index f7b41a7d..143bf568 100644 --- a/msnoise/s04_stack2.py +++ b/msnoise/s04_stack2.py @@ -103,7 +103,7 @@ from .api import * import logbook - +import matplotlib.pyplot as plt def main(stype, interval=1.0, loglevel="INFO"): """Computes the REF/MOV stacks. @@ -127,8 +127,8 @@ def main(stype, interval=1.0, loglevel="INFO"): taxis = get_t_axis(db) mov_stacks = params.mov_stack - if 1 in mov_stacks: - mov_stacks.remove(1) # remove 1 day stack, it will be done automatically + # if 1 in mov_stacks: + # mov_stacks.remove(1) # remove 1 day stack, it will be done automatically filters = get_filters(db, all=False) while is_dtt_next_job(db, flag='T', jobtype='STACK'): @@ -156,11 +156,10 @@ def main(stype, interval=1.0, loglevel="INFO"): for components in components_to_compute: logger.info('Processing %s-%s-%i' % (pair, components, filterid)) - - c = get_results(db, sta1, sta2, filterid, components, days, - mov_stack=1, format="xarray") - dr = xr_save_ccf(sta1, sta2, components, filterid, 1, taxis, c) - + c = get_results_all(db, sta1, sta2, filterid, components, days, format="xarray") + # print(c) + # dr = xr_save_ccf(sta1, sta2, components, filterid, 1, taxis, c) + dr = c if stype == "ref": start, end, datelist = build_ref_datelist(db) start = np.array(start, dtype=np.datetime64) @@ -171,14 +170,30 @@ def main(stype, interval=1.0, loglevel="INFO"): _ = _.mean(dim="times") xr_save_ref(sta1, sta2, components, filterid, taxis, _) continue - + dr = dr.resample(times="%is" % params.corr_duration).mean() for mov_stack in mov_stacks: - if mov_stack > len(dr.times): - logger.error("not enough data for mov_stack=%i" % mov_stack) - continue - # TODO avoid the 1D resampler here - xx = dr.resample(times='1D').mean().rolling( - times=mov_stack, min_periods=1).mean().dropna("times", how="all") + # if mov_stack > len(dr.times): + # logger.error("not enough data for mov_stack=%i" % mov_stack) + # continue + mov_rolling, mov_sample = mov_stack + # print(mov_rolling, mov_sample) + + if mov_rolling == mov_sample: + # just resample & mean + xx = dr.resample(times=mov_sample, label="right", skipna=True).mean().dropna("times", + how="all") + else: + mov_rolling = pd.to_timedelta(mov_rolling).total_seconds() + # print("Will roll over %i seconds" % mov_rolling) + duration_to_windows = mov_rolling / params.corr_duration + if not duration_to_windows.is_integer(): + print("Warning, rounding down the number of windows to roll over") + duration_to_windows = int(max(1, math.floor(duration_to_windows))) + # print("Which is %i windows of %i seconds duration" % (duration_to_windows, params.corr_duration)) + + xx = dr.rolling(times=duration_to_windows, min_periods=1).mean() + xx = xx.resample(times=mov_sample, label="right", skipna=True).asfreq().dropna("times", how="all") + xr_save_ccf(sta1, sta2, components, filterid, mov_stack, taxis, xx, overwrite=True) del xx if stype != "ref": diff --git a/msnoise/s05compute_mwcs2.py b/msnoise/s05compute_mwcs2.py index 7f627d01..4cc2c457 100644 --- a/msnoise/s05compute_mwcs2.py +++ b/msnoise/s05compute_mwcs2.py @@ -71,6 +71,7 @@ .. versionadded:: 1.4 Parallel Processing """ +import pandas as pd from .api import * from .move2obspy import mwcs @@ -191,10 +192,16 @@ def ww(a): except FileNotFoundError as fullpath: logger.error("FILE DOES NOT EXIST: %s, skipping" % fullpath) continue - logger.debug("Processing %s:%s f%i m%i %s" % (station1, station2, filterid, mov_stack, components)) - todo = data.index.intersection(pd.to_datetime(days)) - data = data.loc[todo] + logger.debug("Processing %s:%s f%i m%s %s" % (station1, station2, filterid, mov_stack, components)) + # todo = data.index.intersection() + # data = data.loc[todo] + + to_search = pd.to_datetime(days) + to_search = to_search.append(pd.DatetimeIndex([to_search[-1]+pd.Timedelta("1d"),])) + # data = data[(data.index.floor('d').isin(to_search) or data.index.ceil('d').isin(to_search))] + data = data[data.index.floor('d').isin(to_search)] data = data.dropna() + # print("Whitening %s" % fn) # data = pd.DataFrame(data) # data = data.apply(ww, axis=1, result_type="broadcast") @@ -323,7 +330,6 @@ def ww(a): del X del M, E, MCOH output = pd.concat(output, axis=1) - # output.to_csv(fn.replace('.nc','.csv')) xr_save_mwcs(station1, station2, components, filterid, mov_stack, taxis, output) diff --git a/msnoise/s06compute_dtt2.py b/msnoise/s06compute_dtt2.py index b9849c81..9f81a4fb 100644 --- a/msnoise/s06compute_dtt2.py +++ b/msnoise/s06compute_dtt2.py @@ -81,10 +81,16 @@ def main(interval=1, loglevel="INFO"): logger.error("FILE DOES NOT EXIST: %s, skipping" % fullpath) continue - todo = mwcs.index.intersection(pd.to_datetime(days)) - mwcs = mwcs.loc[todo] + # todo = mwcs.index.intersection(pd.to_datetime(days)) + # mwcs = mwcs.loc[todo] + # mwcs = mwcs.dropna() + + to_search = pd.to_datetime(days) + to_search = to_search.append(pd.DatetimeIndex([to_search[-1] + pd.Timedelta("1d"), ])) + mwcs = mwcs[mwcs.index.floor('d').isin(to_search)] mwcs = mwcs.dropna() + M = mwcs.xs("M", level=0, axis=1).copy() EM = mwcs.xs("EM", level=0, axis=1).copy() MCOH = mwcs.xs("MCOH", level=0, axis=1).copy() diff --git a/msnoise/s07_compute_dvv.py b/msnoise/s07_compute_dvv.py index cf41e6e5..480ef230 100644 --- a/msnoise/s07_compute_dvv.py +++ b/msnoise/s07_compute_dvv.py @@ -24,13 +24,13 @@ def main(interval=1, loglevel="INFO"): for mov_stack in mov_stacks: for components in params.all_components: - logger.debug("Processing f%i m%i %s" % (filterid, mov_stack, components)) + logger.debug("Processing f%i m%s %s" % (filterid, mov_stack, components)) try: dvv = compute_dvv(db, filterid, mov_stack, pairs=None, components=components, params=params) except ValueError: traceback.print_exc() - logger.error("No data for f%i m%i: %s" % (filterid, mov_stack, components)) + logger.error("No data for f%i m%s: %s" % (filterid, mov_stack, components)) continue xr_save_dvv(components, filterid, mov_stack, dvv) del dvv @@ -38,7 +38,7 @@ def main(interval=1, loglevel="INFO"): dvv = compute_dvv(db, filterid, mov_stack, pairs=None, components=None, params=params) except ValueError: - logger.error("No data for any component: f%i m%i" % (filterid, mov_stack)) + logger.error("No data for any component: f%i m%s" % (filterid, mov_stack)) continue xr_save_dvv("ALL", filterid, mov_stack, dvv) del dvv diff --git a/msnoise/stretch2.py b/msnoise/stretch2.py index a87d5251..2f0f0a6c 100644 --- a/msnoise/stretch2.py +++ b/msnoise/stretch2.py @@ -183,7 +183,7 @@ def main(loglevel="INFO"): refs, days = zip(*[[job.ref, job.day] for job in jobs]) logger.info( - "There are MWCS jobs for some days to recompute for %s" % pair) + "There are MWCS (stretching) jobs for some days to recompute for %s" % pair) for f in filters: filterid = int(f.ref) freqmin = f.mwcs_low @@ -239,17 +239,18 @@ def ww(a): allcoefs = [] allerrs = [] - fn = r"STACKS2/%02i/%03i_DAYS/%s/%s_%s.nc" % ( - filterid, mov_stack, components, station1, station2) + fn = r"STACKS2/%02i/%s_%s/%s/%s_%s.nc" % ( + filterid, mov_stack[0], mov_stack[1], components, station1, station2) print("Reading %s" % fn) if not os.path.isfile(fn): print("FILE DOES NOT EXIST: %s, skipping" % fn) continue data = xr_create_or_open(fn) data = data.CCF.to_dataframe().unstack().droplevel(0, axis=1) - valid = data.index.intersection(pd.to_datetime(days)) - data = data.loc[valid] + to_search = pd.to_datetime(days) + data = data[data.index.floor('d').isin(to_search)] data = data.dropna() + # print("Whitening %s" % fn) # data = pd.DataFrame(data) # data = data.apply(ww, axis=1, result_type="broadcast") @@ -300,8 +301,8 @@ def gauss_function(x, a, x0, sigma): np.array([alldeltas, allcoefs, allerrs]).T, index=alldays, columns=["Delta", "Coeff", "Error"], ) output = os.path.join('STR2', "%02i" % filterid, - "%03i_DAYS" % mov_stack, components) - print(df.head()) + "%s_%s" % (mov_stack[0],mov_stack[1]), components) + # print(df.head()) if not os.path.isdir(output): os.makedirs(output) fn = os.path.join(output, "%s.csv" % ref_name) diff --git a/msnoise/test/tests.py b/msnoise/test/tests.py index c20640a5..e5d595b0 100644 --- a/msnoise/test/tests.py +++ b/msnoise/test/tests.py @@ -320,7 +320,7 @@ def test_023_stack(self): update_config(db, 'ref_end', '2011-01-01') update_config(db, 'startdate', '2009-01-01') update_config(db, 'enddate', '2011-01-01') - update_config(db, 'mov_stack', '1,2,5') + update_config(db, 'mov_stack', "(('1d','1d'),('2d','1d'),('5d','1d'))") interval = 1. main('ref', interval) @@ -430,8 +430,8 @@ def test_100_plot_cctfime(self): for filter in get_filters(db): main(sta1, sta2, filter.ref, "ZZ", 1, show=False, outfile="?.png") - fn = 'ccftime %s-%s-f%i-m%i.png' % \ - ("%s-%s" % (sta1, sta2), "ZZ", filter.ref, 1) + fn = 'ccftime %s-%s-f%i-m%s_%s.png' % \ + ("%s-%s" % (sta1, sta2), "ZZ", filter.ref, "1d", "1d") self.assertTrue(os.path.isfile(fn), msg="%s doesn't exist" % fn) def test_100_plot_interferogram(self): @@ -446,10 +446,10 @@ def test_100_plot_interferogram(self): for filter in get_filters(db): main(sta1, sta2, filter.ref, "ZZ", 1, show=False, outfile="?.png") - fn = 'interferogram %s-%s-f%i-m%i.png' % \ + fn = 'interferogram %s-%s-f%i-m%s_%s.png' % \ ("%s-%s" % (sta1, sta2), - "ZZ", filter.ref, 1) + "ZZ", filter.ref, "1d", "1d") self.assertTrue(os.path.isfile(fn), msg="%s doesn't exist" % fn) def test_101_plot_spectime(self): @@ -464,10 +464,10 @@ def test_101_plot_spectime(self): for filter in get_filters(db): main(sta1, sta2, filter.ref, "ZZ", 1, show=False, outfile="?.png") - fn = 'spectime %s-%s-f%i-m%i.png' % \ + fn = 'spectime %s-%s-f%i-m%s_%s.png' % \ ("%s-%s" % (sta1, sta2), - "ZZ", filter.ref, 1) + "ZZ", filter.ref, "1d", "1d") self.assertTrue(os.path.isfile(fn), msg="%s doesn't exist" % fn) def test_102_plot_distance(self):