From e174246c4097b11666e20d3ca2342e69d007dd0e Mon Sep 17 00:00:00 2001 From: Thomas Lecocq Date: Thu, 29 Feb 2024 09:16:15 +0100 Subject: [PATCH] Sub-daily CCFs are now default I've modified the msnoise workflow to allow sub-daily mov_stacks. This means: * keep_all is now default **Y** * The stack method will read ALL the CCFs of corr_duratino length and apply: 1/ a rolling mean (for now, can be adapted later to apply other stack methods) of a given duration (e.g. "1d" (d=day in pandas vocabulary) and 2/ a resampling of (possibly) other duration (e.g. "12h"). This results in the STACKS' netcdf files containing 2 CCFs per day, and all subsequent steps have been adapted to work with any number of indices present in the netcdf files. There might still be some small issues of missing CCFs at the edge of days (or the last window etc), to check. This introduces a solid backward incompatibility, in the parameters, the code & the file structure. --- CHANGELOG.txt | 3 +- msnoise/api.py | 55 +++++++++++++++++-------------- msnoise/default.csv | 6 ++-- msnoise/plots/ccftime.py | 17 ++++++---- msnoise/plots/dvv.py | 19 ++++++----- msnoise/plots/interferogram.py | 21 ++++++++---- msnoise/plots/mwcs.py | 14 ++++---- msnoise/plots/spectime.py | 20 ++++++----- msnoise/preprocessing.py | 2 +- msnoise/s03compute_no_rotation.py | 3 ++ msnoise/s04_stack2.py | 45 ++++++++++++++++--------- msnoise/s05compute_mwcs2.py | 14 +++++--- msnoise/s06compute_dtt2.py | 10 ++++-- msnoise/s07_compute_dvv.py | 6 ++-- msnoise/stretch2.py | 15 +++++---- msnoise/test/tests.py | 14 ++++---- 16 files changed, 160 insertions(+), 104 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index e2f7ec33..738ba7a8 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -2,10 +2,11 @@ master ====== Major: +* MSNoise can now work with subdaily CCFs natively, from CC to dv/v! * added support for Location IDs: TODO + add documentation * this CHANGES the structure of the database !! TODO should we provide an "upgrade" tool? * changes to the schema: Station (and TODO Filter too) -* supprort for PostgreSQL databases. TODO: check if removing index_hints really affect performances (should not) +* support for PostgreSQL databases. TODO: check if removing index_hints really affect performances (should not) Changes: - msnoise diff --git a/msnoise/api.py b/msnoise/api.py index 4bc359e1..cfc4d2cc 100644 --- a/msnoise/api.py +++ b/msnoise/api.py @@ -293,11 +293,10 @@ def get_params(session): params.all_components = np.unique(params.components_to_compute_single_station + \ params.components_to_compute) - if params.mov_stack.count(',') == 0: - params.mov_stack = [int(params.mov_stack), ] + if params.mov_stack.count(',') == 1: + params.mov_stack = [params.mov_stack, ] else: - params.mov_stack = [int(mi) for mi in params.mov_stack.split(',')] - + params.mov_stack = params.mov_stack return params # FILTERS PART @@ -1133,7 +1132,7 @@ def export_allcorr(session, ccfid, data): df = pd.DataFrame().from_dict(data).T df.columns = get_t_axis(session) - df.to_hdf(os.path.join(path, date+'.h5'), 'data') + df.to_hdf(os.path.join(path, date+'.h5'), key='data') del df return @@ -1149,7 +1148,7 @@ def export_allcorr2(session, ccfid, data): df = pd.DataFrame().from_dict(data).T df.columns = get_t_axis(session) - df.to_hdf(os.path.join(path, date+'.h5'), 'data') + df.to_hdf(os.path.join(path, date+'.h5'), key='data') del df return @@ -1499,7 +1498,7 @@ def get_mwcs(session, station1, station2, filterid, components, date, """ TODO """ - file = os.path.join('MWCS', "%02i" % filterid, "%03i_DAYS" % mov_stack, + file = os.path.join('MWCS', "%02i" % filterid, "%s_%s" % (mov_stack[0], mov_stack[1]), components, "%s_%s" % (station1, station2), '%s.txt' % date) if os.path.isfile(file): @@ -1537,7 +1536,10 @@ def get_results_all(session, station1, station2, filterid, components, dates, station1, station2, components) results = [] for date in dates: - fname = os.path.join(path, date.strftime('%Y-%m-%d.h5')) + if isinstance(date, str): + fname = os.path.join(path, date+".h5") + else: + fname = os.path.join(path, date.strftime('%Y-%m-%d.h5')) if os.path.isfile(fname): df = pd.read_hdf(fname, 'data', parse_dates=True) df.index = pd.to_datetime(df.index) @@ -1553,6 +1555,7 @@ def get_results_all(session, station1, station2, filterid, components, dates, dr = xr.DataArray(result, coords=[times, taxis], dims=["times", "taxis"]).dropna("times", how="all") dr.name = "CCF" + dr = dr.sortby('times') return dr.to_dataset() else: return pd.DataFrame() @@ -1695,9 +1698,13 @@ def build_movstack_datelist(session): elif begin == "1970-01-01": # TODO this fails when the DA is empty start = session.query(DataAvailability).order_by( DataAvailability.starttime).first().starttime.date() - end = datetime.datetime.strptime(end, '%Y-%m-%d').date() else: start = datetime.datetime.strptime(begin, '%Y-%m-%d').date() + + if end == "2100-01-01": + end = session.query(DataAvailability).order_by( + DataAvailability.endtime.desc()).first().endtime.date() + else: end = datetime.datetime.strptime(end, '%Y-%m-%d').date() end = min(end, datetime.date.today()) datelist = pd.date_range(start, end).map(lambda x: x.date()) @@ -2186,7 +2193,7 @@ def get_dvv(session, filterid, components, dates, print("No Data for %s m%i f%i" % (components, mov_stack, filterid)) alldf = pd.concat(alldf) - print(mov_stack, alldf.head()) + # print(mov_stack, alldf.head()) if 'alldf' in locals(): errname = "E" + dttname alldf.to_csv("tt.csv") @@ -2215,7 +2222,7 @@ def get_dvv(session, filterid, components, dates, def xr_save_ccf(station1, station2, components, filterid, mov_stack, taxis, new, overwrite=False): path = os.path.join("STACKS2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, "%s" % components) + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components) fn = "%s_%s.nc" % (station1, station2) fullpath = os.path.join(path, fn) if overwrite: @@ -2230,7 +2237,7 @@ def xr_save_ccf(station1, station2, components, filterid, mov_stack, taxis, new, def xr_get_ccf(station1, station2, components, filterid, mov_stack, taxis): path = os.path.join("STACKS2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, "%s" % components) + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components) fn = "%s_%s.nc" % (station1, station2) fullpath = os.path.join(path, fn) @@ -2270,12 +2277,12 @@ def xr_get_ref(station1, station2, components, filterid, taxis): def xr_save_mwcs(station1, station2, components, filterid, mov_stack, taxis, dataframe): fn = os.path.join("MWCS2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components, "%s_%s.nc" % (station1, station2)) if not os.path.isdir(os.path.split(fn)[0]): os.makedirs(os.path.split(fn)[0]) - d = dataframe.stack().stack() + d = dataframe.stack(future_stack=True).stack(future_stack=True) d.index = d.index.set_names(["times", "keys", "taxis"]) d = d.reorder_levels(["times", "taxis", "keys"]) d.columns = ["MWCS"] @@ -2289,7 +2296,7 @@ def xr_save_mwcs(station1, station2, components, filterid, mov_stack, taxis, dat def xr_get_mwcs(station1, station2, components, filterid, mov_stack): fn = os.path.join("MWCS2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components, "%s_%s.nc" % (station1, station2)) if not os.path.isfile(fn): @@ -2315,12 +2322,12 @@ def xr_save_dtt(station1, station2, components, filterid, mov_stack, dataframe): * opened using the given file path, and the stacked data is inserted or updated in the file. The resulting dataset is then saved and the file is closed. """ fn = os.path.join("DTT2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components, "%s_%s.nc" % (station1, station2)) if not os.path.isdir(os.path.split(fn)[0]): os.makedirs(os.path.split(fn)[0]) - d = dataframe.stack() + d = dataframe.stack(future_stack=True) d.index = d.index.set_names(["times", "keys"]) d.columns = ["DTT"] dr = xr_create_or_open(fn, taxis=[], name="DTT") @@ -2343,7 +2350,7 @@ def xr_get_dtt(station1, station2, components, filterid, mov_stack): * result. """ fn = os.path.join("DTT2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s" % components, "%s_%s.nc" % (station1, station2)) if not os.path.isfile(fn): @@ -2356,15 +2363,15 @@ def xr_get_dtt(station1, station2, components, filterid, mov_stack): def xr_save_dvv(components, filterid, mov_stack, dataframe): fn = os.path.join("DVV2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s.nc" % components) if not os.path.isdir(os.path.split(fn)[0]): os.makedirs(os.path.split(fn)[0]) if dataframe.columns.nlevels > 1: - d = dataframe.stack().stack() + d = dataframe.stack(future_stack=True).stack(future_stack=True) else: - d = dataframe.stack() + d = dataframe.stack(future_stack=True) level_names = ["times", "level1", "level0"] d.index = d.index.set_names(level_names[:d.index.nlevels]) @@ -2383,7 +2390,7 @@ def xr_save_dvv(components, filterid, mov_stack, dataframe): def xr_get_dvv(components, filterid, mov_stack): fn = os.path.join("DVV2", "%02i" % filterid, - "%03i_DAYS" % mov_stack, + "%s_%s" % (mov_stack[0], mov_stack[1]), "%s.nc" % components) if not os.path.isfile(fn): # logging.error("FILE DOES NOT EXIST: %s, skipping" % fn) @@ -2404,7 +2411,7 @@ def wavg(group, dttname, errname): """ d = group[dttname] - group[errname][group[errname] == 0] = 1e-6 + group.loc[group[errname] == 0,errname] = 1e-6 w = 1. / group[errname] try: wavg = (d * w).sum() / w.sum() @@ -2434,7 +2441,7 @@ def wstd(group, dttname, errname): Note: This method uses the `np` module from NumPy. """ d = group[dttname] - group[errname][group[errname] == 0] = 1e-6 + group.loc[group[errname] == 0,errname] = 1e-6 w = 1. / group[errname] wavg = (d * w).sum() / w.sum() N = len(np.nonzero(w.values)[0]) diff --git a/msnoise/default.csv b/msnoise/default.csv index 3a0ed408..5890e747 100644 --- a/msnoise/default.csv +++ b/msnoise/default.csv @@ -10,7 +10,7 @@ enddate,2100-01-01,End Date to process: [2100-01-01]='No end',str,,, analysis_duration,86400,Duration of the Analysis in seconds (this shouldn't be changed for now),float,,, cc_sampling_rate,20,Sampling Rate for the CrossCorrelation (in Hz),float,,, cc_normalisation,NO,"Normalisation method for individual `corr_duration`) CCFs, default is not normalized (NO), but can be normalized based on the power of the two traces (POW), or by the maximum (MAX) or absolute maximum (ABSMAX) of the CCF",str,NO/POW/MAX/ABSMAX,, -resampling_method,Lanczos,Resampling method,str,Lanczos/Decimate,, +resampling_method,Decimate,Resampling method,str,Lanczos/Decimate,, preprocess_lowpass,8,Preprocessing Low-pass value (in Hz),float,,, preprocess_highpass,0.01,Preprocessing High-pass value (in Hz),float,,, preprocess_max_gap,10,Preprocessing maximum gap length that will be filled by interpolation (in seconds),float,,, @@ -35,11 +35,11 @@ cc_type,CC,Cross-Correlation type,str,,, components_to_compute_single_station,,"List (comma separated) of components within a single station. ZZ would be the autocorrelation of Z component, while ZE or ZN are the cross-components. Defaults to [], no single-station computations are done.",str,,, cc_type_single_station_AC,CC,Auto-Correlation type,str,,, cc_type_single_station_SC,CC,Cross-Correlation type for Cross-Components,str,,, -keep_all,N,Keep all cross-corr (length: ``corr_duration``),bool,Y/N,, +keep_all,Y,Keep all cross-corr (length: ``corr_duration``),bool,Y/N,, keep_days,Y,Keep all daily cross-corr,bool,Y/N,, ref_begin,1970-01-01,Beginning or REF stacks. Can be absolute (2012-01-01) or relative (-100) days,str,,, ref_end,2100-01-01,End or REF stacks. Same as ``ref_begin``,str,,, -mov_stack,5,"Number of days to stack for the Moving-window stacks ([5]= [day-4:day]), can be a comma-separated list 1,2,5,10",str,,, +mov_stack,"(('1d','1d'))","A list of two parameters: the time to ""roll"" over (default 1 day) and the granularity (step) of the resulting stacked CCFs (default 1 day) to stack for the Moving-window stacks. This can be a list of tuples, e.g. (('1d','1d'),('2d','1d')) corresponds to the MSNoise 1.6 ""1,2"" before. Time deltas can be anything pandas can interpret (""d"", ""min"", ""sec"", etc).",eval,,, export_format,MSEED,Export stacks in which format(s) ?,str,SAC/MSEED/BOTH,, sac_format,doublets,Format for SAC stacks ?,str,doublets/clarke,, dtt_lag,static,How is the lag window defined,str,dynamic/static,, diff --git a/msnoise/plots/ccftime.py b/msnoise/plots/ccftime.py index 090217bf..003c7372 100644 --- a/msnoise/plots/ccftime.py +++ b/msnoise/plots/ccftime.py @@ -45,12 +45,14 @@ from ..api import * -def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, seismic=False, +def main(sta1, sta2, filterid, components, mov_stackid=1, ampli=5, seismic=False, show=False, outfile=None, envelope=False, refilter=None, normalize=None, loglevel="INFO", **kwargs): logger = get_logger('msnoise.cc_plot_ccftime', loglevel, with_pid=True) db = connect() + params = get_params(db) + mov_stack = params.mov_stack[mov_stackid-1] maxlag = float(get_config(db, 'maxlag')) samples = get_maxlag_samples(db) cc_sampling_rate = float(get_config(db, 'cc_sampling_rate')) @@ -76,8 +78,8 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, seismic=False, pair = "%s:%s" % (sta1, sta2) - logger.info("Fetching CCF data for %s-%s-%i-%i" % (pair, components, filterid, - mov_stack)) + logger.info("Fetching CCF data for %s-%s-%i-%s" % (pair, components, filterid, + str(mov_stack))) try: stack_total = xr_get_ccf(sta1, sta2, components, filterid, mov_stack, taxis) except FileNotFoundError as fullpath: @@ -118,9 +120,9 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, seismic=False, plt.xlabel("Lag Time (s)") plt.axhline(0, lw=0.5, c='k') plt.grid() - title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %d' %\ + title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %i (%s_%s)' %\ (sta1, sta2, components, - filterid, low, high, mov_stack) + filterid, low, high, mov_stackid, mov_stack[0], mov_stack[1]) if refilter: title += ", Re-filtered (%.2f - %.2f Hz)" % (freqmin, freqmax) plt.title(title) @@ -138,10 +140,11 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, seismic=False, if outfile: if outfile.startswith("?"): pair = pair.replace(':', '-') - outfile = outfile.replace('?', '%s-%s-f%i-m%i' % (pair, + outfile = outfile.replace('?', '%s-%s-f%i-m%s_%s' % (pair, components, filterid, - mov_stack)) + mov_stack[0], + mov_stack[1])) outfile = "ccftime " + outfile logger.info("output to: %s" % outfile) plt.savefig(outfile) diff --git a/msnoise/plots/dvv.py b/msnoise/plots/dvv.py index b6ce9817..fd1f7195 100644 --- a/msnoise/plots/dvv.py +++ b/msnoise/plots/dvv.py @@ -20,7 +20,7 @@ from ..api import * -def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, +def main(mov_stackid=None, dttname="M", components='ZZ', filterid=1, pairs=[], showALL=False, show=False, outfile=None, loglevel="INFO"): logger = get_logger('msnoise.cc_dvv_plot_dvv', loglevel, with_pid=True) @@ -28,8 +28,10 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, params = get_params(db) start, end, datelist = build_movstack_datelist(db) - if mov_stack and mov_stack != 0: + if mov_stackid and mov_stackid != "": + mov_stack = params.mov_stack[mov_stackid - 1] mov_stacks = [mov_stack, ] + print(mov_stack) else: mov_stacks = params.mov_stack @@ -37,7 +39,7 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, components = components.split(",") else: components = [components, ] - + print(mov_stacks) low = high = 0.0 filter = get_filters(db, ref=filterid) low = float(filter.low) @@ -52,7 +54,7 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, plt.sca(axes[i]) except: plt.sca(axes) - plt.title('%i Days Moving Window' % mov_stack) + # plt.title('%s Moving Window' % mov_stack) for comps in components: try: dvv = xr_get_dvv(comps, filterid, mov_stack) @@ -69,9 +71,9 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=4, ncol=2, borderaxespad=0.) left, right = dvv.index[0], dvv.index[-1] - plt.title('1 Day') + plt.title("Stack %i (%s_%s)"% (mov_stackid or i+1, mov_stack[0], mov_stack[1])) else: - plt.title('%i Days Moving Window' % mov_stack) + plt.title("Stack %i (%s_%s)"% (i+1, mov_stack[0], mov_stack[1])) plt.xlim(left, right) plt.grid(True) @@ -84,9 +86,10 @@ def main(mov_stack=None, dttname="M", components='ZZ', filterid=1, if outfile: if outfile.startswith("?"): if len(mov_stacks) == 1: - outfile = outfile.replace('?', '%s-f%i-m%i-M%s' % (components, + outfile = outfile.replace('?', '%s-f%i-m%s_%s-M%s' % (components, filterid, - mov_stack, + mov_stack[0], + mov_stack[1], dttname)) else: outfile = outfile.replace('?', '%s-f%i-M%s' % (components, diff --git a/msnoise/plots/interferogram.py b/msnoise/plots/interferogram.py index 0cea61c7..d0943d94 100644 --- a/msnoise/plots/interferogram.py +++ b/msnoise/plots/interferogram.py @@ -26,7 +26,7 @@ from ..api import * -def main(sta1, sta2, filterid, components, mov_stack=1, show=True, +def main(sta1, sta2, filterid, components, mov_stackid=1, show=True, outfile=None, refilter=None, loglevel="INFO", **kwargs): logger = get_logger('msnoise.cc_plot_interferogram', loglevel, with_pid=True) @@ -51,8 +51,14 @@ def main(sta1, sta2, filterid, components, mov_stack=1, show=True, pair = "%s:%s" % (sta1, sta2) - logger.info("Fetching CCF data for %s-%s-%i-%i" % (pair, components, filterid, + params = get_params(db) + mov_stack = params.mov_stack[mov_stackid - 1] + # print(mov_stack) + + logger.info("Fetching CCF data for %s-%s-%i-%s" % (pair, components, filterid, mov_stack)) + + try: data = xr_get_ccf(sta1, sta2, components, filterid, mov_stack, taxis) except FileNotFoundError as fullpath: @@ -89,9 +95,9 @@ def main(sta1, sta2, filterid, components, mov_stack=1, show=True, else: plt.ylim(-maxlag, maxlag) - title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %d' % \ - (sta1.replace('_', '.'), sta2.replace('_', '.'), components, - filterid, low, high, mov_stack) + title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %i (%s_%s)' % \ + (sta1, sta2, components, + filterid, low, high, mov_stackid, mov_stack[0], mov_stack[1]) if refilter: title += ", Re-filtered (%.2f - %.2f Hz)" % (freqmin, freqmax) plt.title(title) @@ -101,10 +107,11 @@ def main(sta1, sta2, filterid, components, mov_stack=1, show=True, if outfile: if outfile.startswith("?"): pair = pair.replace(':', '-') - outfile = outfile.replace('?', '%s-%s-f%i-m%i' % (pair, + outfile = outfile.replace('?', '%s-%s-f%i-m%s_%s' % (pair, components, filterid, - mov_stack)) + mov_stack[0], + mov_stack[1])) outfile = "interferogram " + outfile logger.info("output to: %s" % outfile) plt.savefig(outfile) diff --git a/msnoise/plots/mwcs.py b/msnoise/plots/mwcs.py index e623863d..29d1a6b7 100644 --- a/msnoise/plots/mwcs.py +++ b/msnoise/plots/mwcs.py @@ -72,8 +72,9 @@ def plot_lags(minlag, maxlag): maxlag2 = minlag + dtt_width - - logger.info("Fetching CCF data for %s-%s-%i-%i" % (pair, components, filterid, + params = get_params(db) + mov_stack = params.mov_stack[mov_stack-1] + logger.info("Fetching CCF data for %s-%s-%i-%s" % (pair, components, filterid, mov_stack)) id = [] @@ -99,7 +100,7 @@ def plot_lags(minlag, maxlag): alldt = mwcs.M allcoh = mwcs.MCOH id = mwcs.index - print(mwcs.M) + # print(mwcs.M) alldt = alldt.resample('D').mean() allcoh = allcoh.resample('D').mean() @@ -161,7 +162,7 @@ def plot_lags(minlag, maxlag): plt.xlabel('Coherence') plt.ylabel("Lag Time (s)") - name = '%s-%s f%i m%i' % (sta1, sta2, filterid, mov_stack) + name = '%s-%s f%i m%s' % (sta1, sta2, filterid, mov_stack) name = name.replace('_', '.') plt.suptitle(name) @@ -169,10 +170,11 @@ def plot_lags(minlag, maxlag): if outfile: if outfile.startswith("?"): pair = pair.replace(':', '-') - outfile = outfile.replace('?', '%s-%s-f%i-m%i' % (pair, + outfile = outfile.replace('?', '%s-%s-f%i-m%s_%s' % (pair, components, filterid, - mov_stack)) + mov_stack[0], + mov_stack[1])) outfile = "mwcs " + outfile logger.info("output to: %s" % outfile) plt.savefig(outfile) diff --git a/msnoise/plots/spectime.py b/msnoise/plots/spectime.py index 987300fe..0395e2eb 100644 --- a/msnoise/plots/spectime.py +++ b/msnoise/plots/spectime.py @@ -45,10 +45,10 @@ from msnoise.api import build_movstack_datelist, connect, get_config, \ get_filters, get_results, check_stations_uniqueness, xr_get_ccf,\ - get_t_axis, get_logger + get_t_axis, get_logger, get_params -def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, show=False, +def main(sta1, sta2, filterid, components, mov_stackid=1, ampli=5, show=False, outfile=False, refilter=None, startdate=None, enddate=None, loglevel="INFO", **kwargs): logger = get_logger('msnoise.cc_plot_spectime', loglevel, @@ -75,8 +75,9 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, show=False, sta2 = check_stations_uniqueness(db, sta2) pair = "%s:%s" % (sta1, sta2) - - logger.info("Fetching CCF data for %s-%s-%i-%i" % (pair, components, filterid, + params = get_params(db) + mov_stack = params.mov_stack[mov_stackid-1] + logger.info("Fetching CCF data for %s-%s-%i-%s" % (pair, components, filterid, mov_stack)) stack_total = xr_get_ccf(sta1, sta2, components, filterid, mov_stack, taxis) @@ -116,9 +117,9 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, show=False, ax.set_xscale('log') ax.grid() - title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %d' %\ - (sta1.replace('_', '.'), sta2.replace('_', '.'), components, - filterid, low, high, mov_stack) + title = '%s : %s, %s, Filter %d (%.2f - %.2f Hz), Stack %i (%s_%s)' % \ + (sta1, sta2, components, + filterid, low, high, mov_stackid, mov_stack[0], mov_stack[1]) if refilter: title += ", Re-filtered (%.2f - %.2f Hz)" % (freqmin, freqmax) ax.set_title(title) @@ -127,10 +128,11 @@ def main(sta1, sta2, filterid, components, mov_stack=1, ampli=5, show=False, if outfile: if outfile.startswith("?"): pair = pair.replace(':', '-') - outfile = outfile.replace('?', '%s-%s-f%i-m%i' % (pair, + outfile = outfile.replace('?', '%s-%s-f%i-m%s_%s' % (pair, components, filterid, - mov_stack)) + mov_stack[0], + mov_stack[1])) outfile = "spectime " + outfile logger.info("output to: %s" % outfile) plt.savefig(outfile) diff --git a/msnoise/preprocessing.py b/msnoise/preprocessing.py index bdbbe07e..44790367 100644 --- a/msnoise/preprocessing.py +++ b/msnoise/preprocessing.py @@ -100,7 +100,7 @@ def preprocess(stations, comps, goal_day, params, responses=None, loglevel="INFO net, sta, loc = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability( - db, net=net, sta=sta, loc=loc, starttime=gd, endtime=gd) + db, net=net, sta=sta, loc=loc, starttime=gd, endtime=gd + datetime.timedelta(seconds=86401)) for comp in comps: datafiles[station][comp] = [] for file in files: diff --git a/msnoise/s03compute_no_rotation.py b/msnoise/s03compute_no_rotation.py index 4bfdd21f..0fd042d9 100644 --- a/msnoise/s03compute_no_rotation.py +++ b/msnoise/s03compute_no_rotation.py @@ -415,8 +415,11 @@ def main(loglevel="INFO"): # psds[iZ] = mm # define pairwise CCs + # TODO document that MASSIVE change !! + tmptime = tmp[0].stats.starttime.datetime thisdate = tmptime.strftime("%Y-%m-%d") + tmptime = tmp[0].stats.endtime.datetime thistime = tmptime.strftime("%Y-%m-%d %H:%M:%S") # Standard operator for CC diff --git a/msnoise/s04_stack2.py b/msnoise/s04_stack2.py index f7b41a7d..143bf568 100644 --- a/msnoise/s04_stack2.py +++ b/msnoise/s04_stack2.py @@ -103,7 +103,7 @@ from .api import * import logbook - +import matplotlib.pyplot as plt def main(stype, interval=1.0, loglevel="INFO"): """Computes the REF/MOV stacks. @@ -127,8 +127,8 @@ def main(stype, interval=1.0, loglevel="INFO"): taxis = get_t_axis(db) mov_stacks = params.mov_stack - if 1 in mov_stacks: - mov_stacks.remove(1) # remove 1 day stack, it will be done automatically + # if 1 in mov_stacks: + # mov_stacks.remove(1) # remove 1 day stack, it will be done automatically filters = get_filters(db, all=False) while is_dtt_next_job(db, flag='T', jobtype='STACK'): @@ -156,11 +156,10 @@ def main(stype, interval=1.0, loglevel="INFO"): for components in components_to_compute: logger.info('Processing %s-%s-%i' % (pair, components, filterid)) - - c = get_results(db, sta1, sta2, filterid, components, days, - mov_stack=1, format="xarray") - dr = xr_save_ccf(sta1, sta2, components, filterid, 1, taxis, c) - + c = get_results_all(db, sta1, sta2, filterid, components, days, format="xarray") + # print(c) + # dr = xr_save_ccf(sta1, sta2, components, filterid, 1, taxis, c) + dr = c if stype == "ref": start, end, datelist = build_ref_datelist(db) start = np.array(start, dtype=np.datetime64) @@ -171,14 +170,30 @@ def main(stype, interval=1.0, loglevel="INFO"): _ = _.mean(dim="times") xr_save_ref(sta1, sta2, components, filterid, taxis, _) continue - + dr = dr.resample(times="%is" % params.corr_duration).mean() for mov_stack in mov_stacks: - if mov_stack > len(dr.times): - logger.error("not enough data for mov_stack=%i" % mov_stack) - continue - # TODO avoid the 1D resampler here - xx = dr.resample(times='1D').mean().rolling( - times=mov_stack, min_periods=1).mean().dropna("times", how="all") + # if mov_stack > len(dr.times): + # logger.error("not enough data for mov_stack=%i" % mov_stack) + # continue + mov_rolling, mov_sample = mov_stack + # print(mov_rolling, mov_sample) + + if mov_rolling == mov_sample: + # just resample & mean + xx = dr.resample(times=mov_sample, label="right", skipna=True).mean().dropna("times", + how="all") + else: + mov_rolling = pd.to_timedelta(mov_rolling).total_seconds() + # print("Will roll over %i seconds" % mov_rolling) + duration_to_windows = mov_rolling / params.corr_duration + if not duration_to_windows.is_integer(): + print("Warning, rounding down the number of windows to roll over") + duration_to_windows = int(max(1, math.floor(duration_to_windows))) + # print("Which is %i windows of %i seconds duration" % (duration_to_windows, params.corr_duration)) + + xx = dr.rolling(times=duration_to_windows, min_periods=1).mean() + xx = xx.resample(times=mov_sample, label="right", skipna=True).asfreq().dropna("times", how="all") + xr_save_ccf(sta1, sta2, components, filterid, mov_stack, taxis, xx, overwrite=True) del xx if stype != "ref": diff --git a/msnoise/s05compute_mwcs2.py b/msnoise/s05compute_mwcs2.py index 7f627d01..4cc2c457 100644 --- a/msnoise/s05compute_mwcs2.py +++ b/msnoise/s05compute_mwcs2.py @@ -71,6 +71,7 @@ .. versionadded:: 1.4 Parallel Processing """ +import pandas as pd from .api import * from .move2obspy import mwcs @@ -191,10 +192,16 @@ def ww(a): except FileNotFoundError as fullpath: logger.error("FILE DOES NOT EXIST: %s, skipping" % fullpath) continue - logger.debug("Processing %s:%s f%i m%i %s" % (station1, station2, filterid, mov_stack, components)) - todo = data.index.intersection(pd.to_datetime(days)) - data = data.loc[todo] + logger.debug("Processing %s:%s f%i m%s %s" % (station1, station2, filterid, mov_stack, components)) + # todo = data.index.intersection() + # data = data.loc[todo] + + to_search = pd.to_datetime(days) + to_search = to_search.append(pd.DatetimeIndex([to_search[-1]+pd.Timedelta("1d"),])) + # data = data[(data.index.floor('d').isin(to_search) or data.index.ceil('d').isin(to_search))] + data = data[data.index.floor('d').isin(to_search)] data = data.dropna() + # print("Whitening %s" % fn) # data = pd.DataFrame(data) # data = data.apply(ww, axis=1, result_type="broadcast") @@ -323,7 +330,6 @@ def ww(a): del X del M, E, MCOH output = pd.concat(output, axis=1) - # output.to_csv(fn.replace('.nc','.csv')) xr_save_mwcs(station1, station2, components, filterid, mov_stack, taxis, output) diff --git a/msnoise/s06compute_dtt2.py b/msnoise/s06compute_dtt2.py index b9849c81..9f81a4fb 100644 --- a/msnoise/s06compute_dtt2.py +++ b/msnoise/s06compute_dtt2.py @@ -81,10 +81,16 @@ def main(interval=1, loglevel="INFO"): logger.error("FILE DOES NOT EXIST: %s, skipping" % fullpath) continue - todo = mwcs.index.intersection(pd.to_datetime(days)) - mwcs = mwcs.loc[todo] + # todo = mwcs.index.intersection(pd.to_datetime(days)) + # mwcs = mwcs.loc[todo] + # mwcs = mwcs.dropna() + + to_search = pd.to_datetime(days) + to_search = to_search.append(pd.DatetimeIndex([to_search[-1] + pd.Timedelta("1d"), ])) + mwcs = mwcs[mwcs.index.floor('d').isin(to_search)] mwcs = mwcs.dropna() + M = mwcs.xs("M", level=0, axis=1).copy() EM = mwcs.xs("EM", level=0, axis=1).copy() MCOH = mwcs.xs("MCOH", level=0, axis=1).copy() diff --git a/msnoise/s07_compute_dvv.py b/msnoise/s07_compute_dvv.py index cf41e6e5..480ef230 100644 --- a/msnoise/s07_compute_dvv.py +++ b/msnoise/s07_compute_dvv.py @@ -24,13 +24,13 @@ def main(interval=1, loglevel="INFO"): for mov_stack in mov_stacks: for components in params.all_components: - logger.debug("Processing f%i m%i %s" % (filterid, mov_stack, components)) + logger.debug("Processing f%i m%s %s" % (filterid, mov_stack, components)) try: dvv = compute_dvv(db, filterid, mov_stack, pairs=None, components=components, params=params) except ValueError: traceback.print_exc() - logger.error("No data for f%i m%i: %s" % (filterid, mov_stack, components)) + logger.error("No data for f%i m%s: %s" % (filterid, mov_stack, components)) continue xr_save_dvv(components, filterid, mov_stack, dvv) del dvv @@ -38,7 +38,7 @@ def main(interval=1, loglevel="INFO"): dvv = compute_dvv(db, filterid, mov_stack, pairs=None, components=None, params=params) except ValueError: - logger.error("No data for any component: f%i m%i" % (filterid, mov_stack)) + logger.error("No data for any component: f%i m%s" % (filterid, mov_stack)) continue xr_save_dvv("ALL", filterid, mov_stack, dvv) del dvv diff --git a/msnoise/stretch2.py b/msnoise/stretch2.py index a87d5251..2f0f0a6c 100644 --- a/msnoise/stretch2.py +++ b/msnoise/stretch2.py @@ -183,7 +183,7 @@ def main(loglevel="INFO"): refs, days = zip(*[[job.ref, job.day] for job in jobs]) logger.info( - "There are MWCS jobs for some days to recompute for %s" % pair) + "There are MWCS (stretching) jobs for some days to recompute for %s" % pair) for f in filters: filterid = int(f.ref) freqmin = f.mwcs_low @@ -239,17 +239,18 @@ def ww(a): allcoefs = [] allerrs = [] - fn = r"STACKS2/%02i/%03i_DAYS/%s/%s_%s.nc" % ( - filterid, mov_stack, components, station1, station2) + fn = r"STACKS2/%02i/%s_%s/%s/%s_%s.nc" % ( + filterid, mov_stack[0], mov_stack[1], components, station1, station2) print("Reading %s" % fn) if not os.path.isfile(fn): print("FILE DOES NOT EXIST: %s, skipping" % fn) continue data = xr_create_or_open(fn) data = data.CCF.to_dataframe().unstack().droplevel(0, axis=1) - valid = data.index.intersection(pd.to_datetime(days)) - data = data.loc[valid] + to_search = pd.to_datetime(days) + data = data[data.index.floor('d').isin(to_search)] data = data.dropna() + # print("Whitening %s" % fn) # data = pd.DataFrame(data) # data = data.apply(ww, axis=1, result_type="broadcast") @@ -300,8 +301,8 @@ def gauss_function(x, a, x0, sigma): np.array([alldeltas, allcoefs, allerrs]).T, index=alldays, columns=["Delta", "Coeff", "Error"], ) output = os.path.join('STR2', "%02i" % filterid, - "%03i_DAYS" % mov_stack, components) - print(df.head()) + "%s_%s" % (mov_stack[0],mov_stack[1]), components) + # print(df.head()) if not os.path.isdir(output): os.makedirs(output) fn = os.path.join(output, "%s.csv" % ref_name) diff --git a/msnoise/test/tests.py b/msnoise/test/tests.py index c20640a5..e5d595b0 100644 --- a/msnoise/test/tests.py +++ b/msnoise/test/tests.py @@ -320,7 +320,7 @@ def test_023_stack(self): update_config(db, 'ref_end', '2011-01-01') update_config(db, 'startdate', '2009-01-01') update_config(db, 'enddate', '2011-01-01') - update_config(db, 'mov_stack', '1,2,5') + update_config(db, 'mov_stack', "(('1d','1d'),('2d','1d'),('5d','1d'))") interval = 1. main('ref', interval) @@ -430,8 +430,8 @@ def test_100_plot_cctfime(self): for filter in get_filters(db): main(sta1, sta2, filter.ref, "ZZ", 1, show=False, outfile="?.png") - fn = 'ccftime %s-%s-f%i-m%i.png' % \ - ("%s-%s" % (sta1, sta2), "ZZ", filter.ref, 1) + fn = 'ccftime %s-%s-f%i-m%s_%s.png' % \ + ("%s-%s" % (sta1, sta2), "ZZ", filter.ref, "1d", "1d") self.assertTrue(os.path.isfile(fn), msg="%s doesn't exist" % fn) def test_100_plot_interferogram(self): @@ -446,10 +446,10 @@ def test_100_plot_interferogram(self): for filter in get_filters(db): main(sta1, sta2, filter.ref, "ZZ", 1, show=False, outfile="?.png") - fn = 'interferogram %s-%s-f%i-m%i.png' % \ + fn = 'interferogram %s-%s-f%i-m%s_%s.png' % \ ("%s-%s" % (sta1, sta2), - "ZZ", filter.ref, 1) + "ZZ", filter.ref, "1d", "1d") self.assertTrue(os.path.isfile(fn), msg="%s doesn't exist" % fn) def test_101_plot_spectime(self): @@ -464,10 +464,10 @@ def test_101_plot_spectime(self): for filter in get_filters(db): main(sta1, sta2, filter.ref, "ZZ", 1, show=False, outfile="?.png") - fn = 'spectime %s-%s-f%i-m%i.png' % \ + fn = 'spectime %s-%s-f%i-m%s_%s.png' % \ ("%s-%s" % (sta1, sta2), - "ZZ", filter.ref, 1) + "ZZ", filter.ref, "1d", "1d") self.assertTrue(os.path.isfile(fn), msg="%s doesn't exist" % fn) def test_102_plot_distance(self):