Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Running absolute mean normalisation within winzorizing function #226

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion msnoise/default.csv
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ response_prefilt,"(0.005, 0.006, 30.0, 35.0)","Remove instrument correction pre-
maxlag,120,Maximum lag (in seconds),float,,,
corr_duration,1800,Data windows to correlate (in seconds),float,,,
overlap,0,Amount of overlap between data windows [0:1[,float,,,
windsorizing,3,"Windsorizing at N time RMS , 0 disables windsorizing, -1 enables 1-bit normalization",float,,,
windsorizing,3,"Windsorizing at N time RMS , 0 disables windsorizing, -1 enables 1-bit normalization, -2 enables running absolute mean normalisation",float,,,
RAMN_window,10,"Integer number of samples on either side of the sample to consider. Total window size is 2N+1. [10]"",int,,,
RAMN_EQfilt,-1,"Bandpass filter to select for earthquakes for weighting. Format: 'min_freqency,max_freqency'. Unless in this format, no filtering will happen [-1]",str,,,
whitening,A,"Whiten Traces before cross-correlation: [A]ll (except for autocorr), [N]one, or only if [C]omponents are different",str,A/N/C,,
whitening_type,B,"Type of spectral whitening function to use: [B]rutal (amplitude to 1.0), divide spectrum by its [PSD] or by band-passing the white spectrum with a hanning-window [HANN]. WARNING: only works for compute_cc, not compute_cc_rot, where it will always be B",str,B/PSD/HANN,,
clip_after_whiten,N,"Do the clipping (winsorizing) after whitening?",bool,Y/N,,
Expand Down
62 changes: 56 additions & 6 deletions msnoise/s03compute_no_rotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,32 +210,82 @@


import logbook

logger = logbook.Logger(__name__)

def winsorizing(data, params, input="timeseries", nfft=0):
input1D = False
#### Version ERHM wrote to allow for running absolute mean normalisation (RAMN)
## new params file values:
## "windsorizing" = -2 for RAMN
## "RAMN_window" integer number of samples on either side of the sample to consider (the number of samples in the window = 2N+1). Also coded: number of seconds either side of sample.
## "RAMN_EQfilt" range of frequencies to select for earthquakes in the format "min_freqency,max_freqency". Unless in this format, no filtering will happen (default) - choose e.g. "0.01,10" input1D = False
if len(data.shape) == 1:
data = data.reshape(-1, data.shape[0])
input1D = True
if input == "fft":
# data = np.real(sf.ifft(data, n=nfft, axis=1))
data = sf.ifftn(data, [nfft, ], axes=[1, ]).astype(np.float64)

# data = np.real(sf.ifft(data, n=nfft, axis=1))
data = sf.ifftn(data, [nfft, ], axes=[1, ]).astype(np.float64)

count = 1
for i in range(data.shape[0]):
if params.windsorizing == -1:
if count == 1:
logger.debug("ERHM windsorizing: 1-bit normalisation")
np.sign(data[i], data[i]) # inplace
elif params.windsorizing != 0:

elif params.windsorizing == -2:
## make working data set. The dataset can be filtered to select earthquake frequencies as suggested in Bensen et al. 2007.
## perhaps we need to produce an error message if RAMN_EQfilt isn't it the correct format
## do we want the capability to lowpass or highpass filter as well?
tmp = data[i].copy()
if params.RAMN_EQfilt.count(',') == 1 and params.RAMN_EQfilt.split(',')[0] < params.RAMN_EQfilt.split(',')[1]:
minfreq = float(params.RAMN_EQfilt.split(',')[0])
maxfreq = float(params.RAMN_EQfilt.split(',')[1])
if count ==1:
logger.debug("ERHM windsorizing: Waveforms for RAMN weighting filtered between %d and %d" % (minfreq, maxfreq))
tmp = bandpass(tmp, freqmin=minfreq,freqmax=maxfreq, df=params.cc_sampling_rate) ## assumes data already converted to cc_sampling_rate.
#Can get an obspy error: Selected high corner frequency (maxfreq) of bandpass is at or above Nyquist (params.cc_sampling_rate*0.5). Applying a high-pass instead.

else:
if count == 1:
logger.debug("ERHM windsorizing: Waveforms for RAMN weighting unfiltered")
tmp = tmp


window_size = params.RAMN_window # for RAMN_window entered in number of samples
#window_size = params.RAMN_window * params.cc_sampling_rate # for RAMN_window entered in seconds

for j in range(len(data[i])):
if j < window_size: ## loop over working data and pull out those between 0 and n+N
window = tmp[0:j+window_size+1]
elif j >= tmp.shape[0] - window_size: ## loop over working data and pull out those between n-N and max(n)
window = tmp[j-window_size:tmp.shape[0]]
else: ## loop over working data and pull out those between n-N and n+N
window = tmp[j-window_size:j+window_size+1]

## we need to deal with the edge cases in a sensible way: do we divide by 2N+1 for all windows, or by len(window)
## Dividing by len(window) (taking the mean), results in larger weight_j, and more downweighted edges
weight_j = np.mean(np.abs(window)) ## dividing by len(window)
#weight_j = np.sum(np.abs(window))/(2*window_size+1) ## dividing by 2N+1
data[i][j] = data[i][j]/weight_j

elif params.windsorizing != 0:
if count == 1:
logger.debug("ERHM windsorizing: RMS normalisation")
# imin, imax = scoreatpercentile(data[i], [0.1, 99.9])
# not_outliers = np.where((data[i] >= imin) &
# (data[i] <= imax))[0]
# rms = data[i][not_outliers].std() * params.windsorizing
rms = data[i].std() * params.windsorizing
np.clip(data[i], -rms, rms, data[i]) # inplace


count += 1

if input == "fft":
data = sf.fftn(data, [nfft, ], axes=[1, ])
if input1D:
data = data[0]

return data


Expand Down