-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathP02_plot_thermal_mq.py
406 lines (340 loc) · 18.4 KB
/
P02_plot_thermal_mq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
"""
Plots the thermal moonquakes detected by the Civilini et al. 2021 catalog and saves their data as PKL files for easier
access during the finetuning step.
"""
# Import packages
import pandas as pd
import numpy as np
import os
import glob
from obspy.core import read
from scipy import signal
from obspy.signal.invsim import cosine_taper
from obspy.signal.filter import highpass
from matplotlib import pyplot as plt
import datetime
from matplotlib import cm
from joblib import Parallel, delayed
import pickle
# import warnings
# import matplotlib.cbook
# warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
# Setup functions
def find_nearest(array, value):
"""
Find nearest index corresponding to a value in an array
:param array: [np array] Array that we are searching
:param value: [float] Value that we're searching for
:return:
"""
array = np.asarray(array)
idx = (np.abs(array - value)).argmin()
return idx
def run_highpass_filter(input_data, input_corner_freq):
"""
Passes a highpass filter on the data
:param input_data: [obspy trace] Seismic input trace in Obspy format
:param input_corner_freq: [int] Corner frequency of highpass filter
:return:
"""
# Remove the mean and run a cosine taper
trace_nomean = input_data.data - np.mean(input_data.data)
N = len(trace_nomean)
taper_function = cosine_taper(N, p=0.1)
trace_taper = trace_nomean * taper_function
# Highpass filterbound in Hertz
data_filt = highpass(trace_taper, input_corner_freq, input_data.stats.sampling_rate, corners=4, zerophase=False)
tr_filt = input_data
tr_filt.data = data_filt
return tr_filt
def plot_individual_mq(input_file, evid, pre_event_window_size=180, post_event_window_size=360):
"""
Plots the time series and spectrogram of a single moonquake file
:param input_file: [str] Filepath of the hourly data trace
:param evid: [str] The event ID
:param pre_event_window_size: [int] Size of the window in seconds before the moonquake arrival
:param post_event_window_size: [int] Size of the window in seconds after the moonquake arrival
:return:
"""
# Find the header information for this station earthquake in the mq catalog
file_bn = os.path.basename(input_file)
sta = f"g{file_bn.split('_')[1][3:]}"
evid_header = cat.loc[(cat['evid'] == evid) & (cat['station'] == sta)].iloc[0]
# Create variables based on the header
sta = evid_header['station']
ucase_sta = f'G{sta[1:]}'
abs_time = evid_header['abs_time']
year = abs_time.split('-')[0]
month = abs_time.split('-')[1]
day = abs_time.split('-')[2].split('T')[0]
hour = abs_time.split('T')[1].split(':')[0]
minute = abs_time.split('T')[1].split(':')[1]
second = abs_time.split('T')[1].split(':')[2]
# Get the UTC time from the sacfile (it takes too much space to output to txt in P3_a)
st = read(input_file)
tr = st[0]
utc_time = tr.times("utcdatetime")
# Find the relative time to the start of the record for the moonquake arrival
dt_event = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
dt_trace_start = datetime.datetime(int(year), int(month), int(day), int(hour))
diff = dt_event - dt_trace_start
rel_det = diff.seconds
f_original, t_original, Sxx_original = signal.spectrogram(tr.data, tr.stats.sampling_rate)
# Find the maximum value for the segment of data we're looking at
Sxx_window_start_ind = find_nearest(t_original, rel_det - Sxx_background_threshold_window)
Sxx_window_end_ind = find_nearest(t_original, rel_det)
Sxx_subset = Sxx_original[:, Sxx_window_start_ind:Sxx_window_end_ind]
Sxx_background_val = np.max(Sxx_subset)
if Sxx_background_val < Sxx_background_threshold:
mia_data = 1
else:
mia_data = 0
# Use below if you need to run a highpass filter
# trace_filtered = tr
trace_filtered = run_highpass_filter(tr, 10.0)
f, t, Sxx = signal.spectrogram(trace_filtered.data, trace_filtered.stats.sampling_rate)
# Have to change the evid header for the absolute time for Windows machines. Windows doesn't support :
evid_header['abs_time'] = f"{evid_header['abs_time'].split('T')[0]}T" \
f"{evid_header['abs_time'].split('T')[1].split(':')[0]}.{evid_header['abs_time'].split('T')[1].split(':')[1]}." \
f"{evid_header['abs_time'].split('T')[1].split(':')[2]}"
# Name the title and figure
plt_title = f"{evid_header['station']}, {evid_header['evid']}, {evid_header['abs_time']}, " \
f"Grade {evid_header['grade']}, SNR = {evid_header['snr']} ({evid_header['practical_snr']})"
plt_filename = f"{evid_header['station']}_{evid_header['evid']}_{evid_header['abs_time']}_G{evid_header['grade']}"
# # Plot the figure
tme = (tr.times("matplotlib") - tr.times("matplotlib")[0]) * 86400
if plt_individual == 'yes':
fig = plt.figure(figsize=(10, 10), num=1, clear=True)
ax = fig.add_subplot(2, 1, 1)
ax.plot(tme, tr.data, "b-")
ax.set_ylabel('Velocity (volts)', fontweight='bold')
ax.set_xlabel('Time (s)', fontweight='bold')
ax.set_xlim((rel_det - pre_event_window_size, rel_det + post_event_window_size))
ax.axvline(x=rel_det, c='r')
ax.axvspan(rel_det, rel_det + evid_header['envelope_size'], alpha=0.5, color='gray')
ax.set_title(plt_title, fontweight='bold')
ax2 = fig.add_subplot(2, 1, 2)
specmax = 1e-6
specax = ax2.pcolormesh(t, f, Sxx, cmap=cm.jet, vmax=specmax, shading='auto')
ax2.set_ylabel('Frequency (Hz)', fontweight='bold')
ax2.set_xlabel('Time (sec)', fontweight='bold')
ax2.set_xlim((rel_det - pre_event_window_size, rel_det + post_event_window_size))
ax2.axvline(x=rel_det, c='r')
fig.tight_layout()
fig.subplots_adjust(bottom=0.2, top=0.9)
cbar_ax = fig.add_axes([0.30, 0.08, 0.4, 0.03])
cbar = fig.colorbar(specax, cax=cbar_ax, orientation='horizontal')
cbar.set_label('Spectral Energy', fontweight='bold')
fig.savefig(f"{out_image_directory}{evid_header['station']}/{plt_filename}.png")
# plt.savefig(f"{out_image_directory}{evid_header['station']}/EPS_{plt_filename}.eps")
return tme, tr.data, t, f, Sxx, rel_det, mia_data
def plot_combined(time_array, data_array, spec_t_array, spec_f_array, rel_det_vector,
Sxx_array, evid, data_geophone_list, pre_event_window_size, post_event_window_size):
"""
:param time_array: [np_array] 2D array of time series time
:param data_array: [np_array] 2D array of time series data
:param spec_t_array: [np_array] 2D array of spectrogram time
:param spec_f_array: [np_array] 2D array of spectrogram frequency
:param rel_det_vector: [list] Vector of relative detections
:param Sxx_array: [np_array] 3D array of sepctrogram values, with depth correpsonding to geophone
:param data_geophone_list: [vector] Describes which index corresponds to which geophone
:param evid: [str] event ID
:param window_size: [int] Size of the window in seconds around the moonquake
:return:
"""
# We will plot by iterating across each geophone
# This way our codes will be flexible when it comes to events with grade lower than A (i.e. don't have 4 geophones)
num_geophones = np.shape(time_array)[1]
# Find a good ylimit for plotting the traces
# Cut the traces for what we are plotting below
max_trace_ylim = []
for geophone_ind in np.arange(num_geophones):
xlim_min_ind = find_nearest(time_array[:, geophone_ind], rel_det_vector[geophone_ind] - pre_event_window_size)
xlim_max_ind = find_nearest(time_array[:, geophone_ind], rel_det_vector[geophone_ind] + post_event_window_size)
segmented_trace = data_array[xlim_min_ind:xlim_max_ind, geophone_ind]
max_trace_ylim.append(np.max([np.max(segmented_trace), abs(np.min(segmented_trace))]))
trace_plot_ymax = np.max(max_trace_ylim)
# Start the iteration and plot
fig, axs = plt.subplots(2, num_geophones, figsize=(16, 7), num=2, clear=True)
for geophone_ind in np.arange(num_geophones):
# Plot the time series
axs[0, geophone_ind].plot(time_array[:, geophone_ind], data_array[:, geophone_ind], "b-")
axs[0, geophone_ind].set_ylabel('Amplitude (counts)', fontweight='bold')
axs[0, geophone_ind].set_xlabel('Time (s)', fontweight='bold')
axs[0, geophone_ind].set_xlim((rel_det_vector[geophone_ind] - pre_event_window_size,
rel_det_vector[geophone_ind] + post_event_window_size))
axs[0, geophone_ind].set_ylim((-1*trace_plot_ymax, trace_plot_ymax))
axs[0, geophone_ind].axvline(x=rel_det_vector[geophone_ind], c='r')
axs[0, geophone_ind].set_title(f'{data_geophone_list[geophone_ind]} (t={int(rel_det_vector[geophone_ind])} sec)')
# Plot the spectrogram
specmax = 1e-6
specax = axs[1, geophone_ind].pcolormesh(spec_t_array[:, geophone_ind], spec_f_array[:, geophone_ind],
Sxx_array[:, :, geophone_ind], cmap=cm.jet, vmax=specmax,
shading='auto')
# specax = axs[1, geophone_ind].pcolormesh(spec_t_array[:, geophone_ind], spec_f_array[:, geophone_ind],
# Sxx_array[:, :, geophone_ind], cmap=cm.jet)
axs[1, geophone_ind].set_ylabel('Frequency (Hz)', fontweight='bold')
axs[1, geophone_ind].set_xlabel('Time (sec)', fontweight='bold')
axs[1, geophone_ind].set_xlim((rel_det_vector[geophone_ind] - pre_event_window_size,
rel_det_vector[geophone_ind] + post_event_window_size))
axs[1, geophone_ind].axvline(x=rel_det_vector[geophone_ind], c='r')
# Do tight_layout BEFORE adjusting the subplots to add the colorbar
fig.tight_layout()
fig.subplots_adjust(bottom=0.2, top=0.9)
fig.suptitle(f'{evid}', fontweight='bold')
cbar_ax = fig.add_axes([0.30, 0.08, 0.4, 0.03])
cbar = fig.colorbar(specax, cax=cbar_ax, orientation='horizontal')
cbar.set_label('Spectral Energy', fontweight='bold')
fig.savefig(f"{out_image_directory}combined/GradeA_evid_{evid}.png")
# fig.savefig(f"{out_image_directory}combined/EPS_GradeA_evid_{evid}.eps")
# plt.close('all')
return
def plot_mq_wrapper(input_info, pre_event_window_size=180, post_event_window_size=360):
"""
Wrapper code for plotting the thermal moonquakes
:param input_info: [series] Displays the catalog information for the moonquake
:param pre_event_window_size: [float] Time in seconds before the arrival to take
:param post_event_window_size: [float] Time in seconds after the arrival to take
:return:
"""
# Get information about the moonquake from the header
# Note, get only the general date/hour for this step, as the precise time will vary for the instruments.
abs_time = input_info['abs_time']
year = abs_time.split('-')[0]
month = abs_time.split('-')[1]
day = abs_time.split('-')[2].split('T')[0]
hour = abs_time.split('T')[1].split(':')[0]
evid = input_info['evid']
# If the event has already been done, skip
if os.path.exists(f'{out_image_directory}combined_files/evid_{evid}.pkl'):
print(f'Event {evid} has already been processed...')
return
# Find the piece of data that matches this event (data is hourly)
data_files = sorted(glob.glob(f'{sacdata}{year}{month}{day}/{year}{month}{day}_17Geo*_{hour}_ID'))
# Create a data geophone list so we can know which index corresponds to which geophone
# Note, this is fine for Grade A events where it goes Geo1, 2, 3, 4, but it's good to keep track
# for later down the line if we choose events with fewer number of geophones.
num_geophones = len(data_files)
data_geophone_list = []
for geophone_ind in np.arange(num_geophones):
data_file_bn = os.path.basename(data_files[geophone_ind])
data_geophone_list.append(f"g{data_file_bn.split('_')[1].split('17')[1][1:]}")
# Plot individual figures of the events
# Save each data vector for the plotting so we can easily combine without reopening the data
# We have to wait to intialize the arrays after getting the shape from the first piece of data
geophone_ind = 0
mia_vector = []
for data_file in data_files:
time, data, spec_t, spec_f, spec_Sxx, rel_det, mia_val = plot_individual_mq(data_file, input_info['evid'])
mia_vector.append(mia_val)
# Note: time_array, data_array, spec_f_array, spec_t_array will be 2D ARRAYS
# rel_det_vector will be a VECTOR
# Sxx_array is a 3D array with depth being the iteration of the geophone
if geophone_ind == 0:
time_array = np.zeros((len(time), len(data_files)))
data_array = np.zeros((len(data), len(data_files)))
spec_t_array = np.zeros((len(spec_t), len(data_files)))
spec_f_array = np.zeros((len(spec_f), len(data_files)))
rel_det_vector = np.zeros((len(data_files)))
Sxx_array = np.zeros((np.shape(spec_Sxx)[0], np.shape(spec_Sxx)[1], len(data_files)))
# Now place each vector or array into their representative array (or 3D array in the case of Sxx)
time_array[:, geophone_ind] = time
data_array[:, geophone_ind] = data
spec_t_array[:, geophone_ind] = spec_t
spec_f_array[:, geophone_ind] = spec_f
rel_det_vector[geophone_ind] = rel_det
Sxx_array[:, :, geophone_ind] = spec_Sxx
# Increase the geophone index
geophone_ind = geophone_ind + 1
# If the background value is very low (i.e. missing data), do not continue the processing
mia_sum = np.sum(mia_vector)
if mia_sum > 0:
print(f'{evid} has missing data! Skipping...')
return
# Save the cut time series into a different location
# The time-window will need to be a bit larger to make sure that we are getting the full extent of the trace
# Use geophone 1 as the reference geophone (but it shouldnt matter)
ref_geophone_ind = 0
first_arrival = np.min(rel_det_vector)
last_arrival = np.max(rel_det_vector)
xcut_min_ind = find_nearest(time_array[:, ref_geophone_ind], first_arrival - pre_event_window_size)
xcut_max_ind = find_nearest(time_array[:, ref_geophone_ind], last_arrival + post_event_window_size)
# Plot a figure showing all of the traces together
plot_combined(time_array, data_array, spec_t_array, spec_f_array, rel_det_vector, Sxx_array,
evid, data_geophone_list, pre_event_window_size, post_event_window_size)
# As the time array is displayed in relative time, we need to know the start and end times of the trace
xcut_min_ind_value = time_array[xcut_min_ind, ref_geophone_ind]
xcut_max_ind_value = time_array[xcut_max_ind, ref_geophone_ind]
dt_start = datetime.datetime(int(year), int(month), int(day), int(hour))
abs_trace_start = dt_start + datetime.timedelta(seconds=xcut_min_ind_value)
abs_trace_end = dt_start + datetime.timedelta(seconds=xcut_max_ind_value)
# Cut the data
time_array_cut = time_array[xcut_min_ind:xcut_max_ind, :]
data_array_cut = data_array[xcut_min_ind:xcut_max_ind, :]
# Save this information along with the header of the hour file in the pickle
with open(f'{out_image_directory}combined_files/evid_{evid}.pkl', 'wb') as f:
pickle.dump([time_array_cut, data_array_cut, abs_trace_start, abs_trace_end, rel_det_vector,
input_info, data_geophone_list], f)
print(f"Finished saving figures and data for {evid}...")
return
def find_largest_events(input_cat, num_events):
"""
Returns the indices of the largest snr events within a catalog file
:param input_cat: [pd df] Catalog file
:param num_events: [int] The number of events we want
:return:
"""
snr_vector = input_cat['practical_snr'].values
maxsnr_evids = []
for evt_index in np.arange(num_events):
maxval_index = np.where(snr_vector == np.max(snr_vector))[0][0]
# Find the evt corresponding to this index
event_desig = input_cat['evid'].values[maxval_index]
maxsnr_evids.append(event_desig)
snr_vector[maxval_index] = -9999
# For all of these evids, remove the ones whose maximum amplitude value is not
# Now go back to the original catalog and find the index corresponding to the evid
maxsnr_idx = []
for maxsnr_evid in maxsnr_evids:
maxsnr_idx.append(np.where(input_cat['evid'].values == maxsnr_evid)[0][0])
return maxsnr_idx
# Main
# Running directory is where the codes are located
rundir = 'C:/Users/fcivi/Dropbox/NASA_codes/thermal_loc_final4/'
data_dir = 'C:/data/lunar_data/'
sacdata = f'{data_dir}LSPE_sac_hourly/'
output_dir = 'C:/data/lunar_output/'
if not os.path.exists(output_dir):
os.mkdir(output_dir)
# Choose whether or not to plot individual plots for each instrument or just a single aggregate plot
plt_individual = 'yes'
# Set number of CPU cores
num_cores = 5
catalog_file = f'{rundir}fc_lspe_dl_catalog.csv'
out_image_directory = f'{output_dir}original_images/'
if not os.path.exists(out_image_directory):
os.mkdir(out_image_directory)
# Setup the maximum amplitude threshold. If all 4 geophones are less than this value, do not process
Sxx_background_threshold = 1e-10
Sxx_background_threshold_window = 50
# Setup the output image directories for the geophone
if not os.path.exists(f'{out_image_directory}combined/'):
os.mkdir(f'{out_image_directory}combined/')
if not os.path.exists(f'{out_image_directory}combined_files/'):
os.mkdir(f'{out_image_directory}combined_files/')
geophones = ['geo1', 'geo2', 'geo3', 'geo4']
for geophone in geophones:
if not os.path.exists(f'{out_image_directory}{geophone}'):
os.mkdir(f'{out_image_directory}{geophone}')
# Select Grade A events from the catalog (found on all 4 geophones). Select B, C, D, for other grades
cat = pd.read_csv(catalog_file)
grade_a = cat.loc[(cat['grade'] == 'A') & (cat['station'] == 'geo1')]
grade_a = grade_a.reset_index()
subset_cat = grade_a
# For each event, plot a figure showing the time series and spectrogram of the event
if num_cores == 1:
for image_ind in np.arange(len(subset_cat)):
plot_mq_wrapper(subset_cat.iloc[image_ind])
else:
Parallel(n_jobs=num_cores)(delayed(plot_mq_wrapper)(subset_cat.iloc[image_ind])
for image_ind in np.arange(len(subset_cat)))