diff --git a/docs/pysep.rst b/docs/pysep.rst index 757b47a..c6de343 100644 --- a/docs/pysep.rst +++ b/docs/pysep.rst @@ -153,62 +153,93 @@ class. See the PySEP docstring for input parameter types and definitions. ------------------------------------------------------------------------------- -Multiple Event Input --------------------- +PySEP Outputs +-------------- -To use the same configuration file with multiple events, you can use an event -file passed to PySEP through the command line. +PySEP uses a default directory structure when saving files: -When using this option, the event parameters inside the config file will be -ignored, but all the other parameters will be used to gather data and metadata. +* ``output_dir``: By default, PySEP writes all files to the User-defined + parameter ``output_dir``, which defaults to the current working directory. +* ``event_tag``: Files are written into a sub-directory defined by the event + origin time, and Flinn-Engdahl region. For example: + ``2009-04-07T201255_SOUTHERN_ALASKA`` +* ``sac_subdir``: All waveform files are saved in a further sub-directory + (default: `SAC/`), to avoid cluttering up the output directory. Waveform + files are tagged by the `event_tag` and trace ID. -Event input files should be text files where each row describes one event with -the following parameters as columns: +Users can use the parameters ``write_files`` and ``plot_files`` to control +exactly what files are produced during a PySEP (see `API documentation +`__ for details). -.. ORIGIN_TIME LONGITUDE LATITUDE DEPTH[KM] MAGNITUDE +By default, PySEP will write SAC files, StationXML, QuakeML and config files, +and create a source-receiver map and record section. -For an example event input file called 'event_input.txt', call structure is: +Override Directory Names +```````````````````````` -.. code:: bash +In some cases it may be useful for Users to save files directly to their +working directory, without all the automatically generated sub-directories. - pysep -c pysep_config.yaml -E event_input.txt +* To ignore the automatically generated event tag, you can set the + ``overwrite_event_tag`` parameter as an empty string. Via the command line: -.. note:: + .. code:: bash - Multiple event input is only available for command line usage of PySEP. - We suggest using a for loop if you would like to script multiple event - input using PySEP + pysep -c pysep_config.yaml --overwrite_event_tag '' + or via scripting: -Legacy Filenaming Schema ------------------------- + .. code:: python -The new version of PySEP uses a file naming schema that is incompatible with -previous versions, which may lead to problems in established workflows. + sep = Pysep(overwrite_event_tag="") -To honor the legacy naming schema of PySEP, simply use the `--legacy_naming` -argument in the command line. This will change how the event tag is formatted, -how the output directory is structured, and how the output SAC files are named. +* To set your own event tag, use a string value -.. code:: bash + .. code:: bash - pysep -c pysep_config.yaml --legacy_naming + pysep -c pysep_config.yaml --overwrite_event_tag event_abc -Or with scripting +* To ignore the SAC subdirectory and save waveform files directly in the + output directory, use the ``sac_subdir`` parameter, which should be input in + your YAML parameter file: -.. code:: python + .. code:: yaml - sep = Pysep(legacy_naming=True, ...) + sac_subdir: '' + + or via scripting + + .. code:: python + + sep = Pysep(sac_subdir="") # or use a string value to define your own + +* `Example`: if a User only wants to save SAC waveforms for the rotated RTZ + component within their current working directory, ignoring all automatically + generated sub directories, all other written files and all plots: + + .. code:: python + + sep = Pysep(overwrite_event_tag="", sac_subdir="", write_files="sac_rtz", + plot_files="") -Output Filename Control -``````````````````````` +Override Filenames +`````````````````` + +.. note:: + + The output SAC file names are hardcoded as trace IDs (with or without the + event tag). If additional control over file IDs is a required feature, + please open up a `GitHub issue `__ The event tag used to name the output directory and written SAC files can be set -manually by the user using the `--event_tag` argument. If not given, the tag -will default to a string consisting of event origin time and Flinn-Engdahl -region (or just origin time if `--legacy_naming` is used). Other output files -such as the config file and ObsPy objects can be set as in the following: +manually by the user using the ``overwrite_event_tag`` argument. + +Other output file names can also be changed from their default values, see the +:meth:`write function ` for write file options and +arguments to use for changing file names. + +An example of this via the command line: .. code:: bash @@ -228,15 +259,57 @@ Or with scripting config_fid="event_abc.yaml", ...) +Legacy Filenaming Schema +```````````````````````` + +The new version of PySEP uses a file naming schema that is incompatible with +previous versions, which may lead to problems in established workflows. + +To honor the legacy naming schema of PySEP, simply use the ``legacy_naming`` +parameter. This will change how the event tag is formatted, how the output +directory is structured, and how the output SAC files are named. + +.. code:: bash + + pysep -c pysep_config.yaml --legacy_naming + +Or with scripting + +.. code:: python + + sep = Pysep(legacy_naming=True, ...) + + +Multiple Event Input +-------------------- + +To use the same configuration file with multiple events, you can use an event +file passed to PySEP through the command line. + .. note:: - The output SAC file names are hardcoded and cannot be changed - by the user. If this is a required feature, please open up a GitHub issue, - and the developers will address this need. + Multiple event input is only available for command line usage of PySEP. + We suggest using a for loop if you would like to script multiple event + input using PySEP + + +When using this option, the event parameters inside the config file will be +ignored, but all the other parameters will be used to gather data and metadata. + +Event input files should be text files where each row describes one event with +the following parameters as columns: + +.. ORIGIN_TIME LONGITUDE LATITUDE DEPTH[KM] MAGNITUDE + +For an example event input file called 'event_input.txt', call structure is: + +.. code:: bash + + pysep -c pysep_config.yaml -E event_input.txt ObsPy Mass Downloader -````````````````````` +--------------------- `ObsPy's Mass Download `__ diff --git a/pysep/pysep.py b/pysep/pysep.py index 534f783..521bb36 100644 --- a/pysep/pysep.py +++ b/pysep/pysep.py @@ -7,6 +7,7 @@ adjoint tomography codes. """ import argparse +import logging import os import shutil import sys @@ -39,7 +40,7 @@ remove_traces_w_masked_data ) from pysep.utils.fmt import format_event_tag, format_event_tag_legacy, get_codes -from pysep.utils.io import read_yaml, read_event_file, write_pysep_stations_file +from pysep.utils.io import read_yaml, read_event_file, write_pysep_station_file from pysep.utils.llnl import scale_llnl_waveform_amplitudes from pysep.utils.process import (merge_gapped_data, trim_start_end_times, resample_data, format_streams_for_rotation, @@ -53,12 +54,13 @@ class Pysep: def __init__(self, config_file=None, event_selection="default", client="IRIS", origin_time=None, reference_time=None, networks="*", stations="*", locations="*", channels="*", - station_ids=None, + station_ids=None, use_mass_download=False, + extra_download_pct=0.005, event_latitude=None, event_longitude=None, event_depth_km=None, event_magnitude=None, remove_response=True, remove_clipped=True, remove_insufficient_length=True, remove_masked_data=True, - water_level=60, detrend=True, demean=True, taper_percentage=0, + water_level=60, detrend=True, demean=True, taper_percentage=0., rotate=None, pre_filt="default", fill_data_gaps=False, gap_fraction=1., mindistance_km=0, maxdistance_km=20E3, minazimuth=0, @@ -68,10 +70,11 @@ def __init__(self, config_file=None, event_selection="default", seconds_after_event=20, seconds_before_ref=100, seconds_after_ref=300, taup_model="ak135", output_unit="VEL", user=None, password=None, client_debug=False, - log_level="DEBUG", timeout=600, write_files="all", + log_level="DEBUG", timeout=600, + write_files="inv,event,stream,sac,config_file,station_list", plot_files="all", llnl_db_path=None, output_dir=None, - overwrite=False, legacy_naming=False, overwrite_event_tag=None, - use_mass_download=False, extra_download_pct=0.005, + overwrite=False, legacy_naming=False, + overwrite_event_tag=None, **kwargs): """ .. note:: @@ -142,6 +145,19 @@ def __init__(self, config_file=None, event_selection="default", :param event_longitude: longitude of the event in units of degrees. used for defining the event hypocenter and for removing stations based on distance from the event. + :type event_depth_km: float or NoneType + :param event_depth_km: depth of event in units of kilometers. postive + values for deeper depths. Used for: + + 1) `event_selection`=='search' + 2) estimating phase arrivals with TauP + 3) plotting events and title on source receiver maps + + If set to None, (2) and (3) will fail. Best-guesses are acceptable. + :type event_magnitude: float or NoneType + :param event_magnitude: event magnitude in Mw used for + `event_selection`=='search' and source receiver map plotting. If + provided as None, map plotting will fail. :type seconds_before_event: float :param seconds_before_event: For event selection only, only used if `event_selection`=='search'. Time [s] before given `origin_time` to @@ -275,8 +291,8 @@ def __init__(self, config_file=None, event_selection="default", Data processing parameters :type detrend: bool - :param detrend: apply simple linear detrend to data during prior to - removing instrument response (if `remove_response`==True) + :param detrend: apply simple linear detrend as the first preprocessing + step :type demean: bool :param demean: apply demeaning to data during instrument reseponse removal. Only applied if `remove_response` == True. @@ -295,7 +311,7 @@ def __init__(self, config_file=None, event_selection="default", (order insensitive): * ZNE: Rotate from arbitrary components to North, East, Up - * RTZ: Rotate from ZNE to Radial, Transverse, Up (requires ZNE) + * RTZ: Rotate from ZNE to Radial, Transverse, Up * UVW: Rotate from ZNE to orthogonal UVW orientation If set to None, no rotation processing will take place. :type resample_freq: float @@ -366,30 +382,44 @@ def __init__(self, config_file=None, event_selection="default", SAC files. Legacy filenames look something like '20000101000000.NN.SSS.LL.CC.c' (event origin time, network, station, location, channel, component). Default to False - :type overwrite_event_tag: str + :type overwrite_event_tag: str or bool :param overwrite_event_tag: option to allow the user to set their own - event tag, rather than the automatically generated one + event tag, rather than the automatically generated one. + + - NoneType (default): use automatically generated event tag which + consists of event origin time and Flinn-Engdahl region + - '': empty string will dump ALL files into `output_dir`, no new + directories will be made + - str: User-defined event tag which will be created in `output_dir`, + all files will be stored in {output_dir}/{overwrite_event_tag}/* .. note:: Output file and figure control :type write_files: str or NoneType :param write_files: Which files to write out after data gathering. - Should be a comma-separated list of the following - - - weights_az: write out CAP weight file sorted by azimuth - - weights_dist: write out CAP weight file sorted by distance - - weights_code: write out CAP weight file sorted by station code - - station_list: write out a text file with station informatino - - inv: save a StationXML (.xml) file (ObsPy inventory) - - event: save a QuakeML (.xml) file (ObsPy Catalog) - - stream: save an ObsPy stream in Mseed (.ms) (ObsPy Stream) - - config_file: save YAML config file containing all input parameters - - sac: save all waveforms as SAC (.sac) files separated by component - - sac_raw: save the raw (pre-rotation) SAC files - - all: write all of the above (default value) + + 1) User-defined comma-separated list of the following + + - weights_az: write out CAP weight file sorted by azimuth + - weights_dist: write out CAP weight file sorted by distance + - weights_code: write out CAP weight file sorted by station code + - station_list: write out a text file with station information + - inv: save a StationXML (.xml) file (ObsPy inventory) + - event: save a QuakeML (.xml) file (ObsPy Catalog) + - stream: save an ObsPy stream in Mseed (.ms) (ObsPy Stream) + - config_file: save YAML config file w/ all input parameters + - sac: save all waveforms as SAC (.sac) files w/ correct headers + - sac_raw: save raw waveforms. these are straight from the + data center with no quality check and no SAC headers + - sac_zne: save only ZNE channel SAC files + - sac_rtz: save only RTZ channel SAC files + - sac_uvw: save only UVW channel SAC files + Example input: `write_files`=='inv,event,stream,sac' - If None, no files will be written. This is generally not advised. + By Default: 'inv,event,stream,sac,config_file,station_list' + 2) If NoneType or an empty string, no files will be written. + 3) If 'all', write all files listed in (1) :type plot_files: str or NoneType :param write_files: What to plot after data gathering. Should be a comma-separated list of the following: @@ -575,6 +605,10 @@ def check(self): f"`station_id` entries must have format NN.SSS.LL.CC; {id_}" ) + if self.use_mass_download is True: + logger.info("will use option `mass_download`, ignoring `client` " + "and downloading data from all available data centers") + if not (0 <= self.minazimuth <= 360): _old_val = self.minazimuth self.minazimuth = self.minazimuth % 360 @@ -616,39 +650,47 @@ def check(self): assert(self.fill_data_gaps in acceptable_fill_vals), \ f"`fill_data_gaps` must be one of {acceptable_fill_vals}" - # Enforce that `write_files` and `plot_files` are sets - if self.write_files is None: + # Enforce acceptable writing options + acceptable_write_files = self.write(_return_filenames=True) + # Empty string or NoneType translates to 'dont write anything' + if not self.write_files: self.write_files = {} + # Default behavior, write everything under the sun + elif self.write_files == "all": + self.write_files = acceptable_write_files + # User-defined, comma-separated list of values which must match + # against acceptable types else: try: self.write_files = set(self.write_files.split(",")) # TypeError thrown if we're trying to do {{*}} except TypeError: pass - - acceptable_write_files = self.write(_return_filenames=True) assert(self.write_files.issubset(acceptable_write_files)), ( f"`write_files` must be a list of some or all of: " f"{acceptable_write_files}" ) - if self.plot_files is None: + # Enforce acceptable plotting options + acceptable_plot_files = {"map", "record_section"} + # Empty string or NoneType translate to do not plot + if not self.plot_files: self.plot_files = {} + # Default behavior, plot everything under the sun + elif self.plot_files == "all": + self.plot_files = acceptable_plot_files + # User-defined, comma-separated list of values which must match + # against acceptable types else: try: self.plot_files = set(self.plot_files.split(",")) except TypeError: pass - acceptable_plot_files = {"map", "record_section", "all"} assert(self.plot_files.issubset(acceptable_plot_files)), ( f"`plot_files` must be a list of some or all of: " f"{acceptable_plot_files}" ) - if self.use_mass_download is True: - logger.info("will use option `mass_download`, ignoring `client` " - "and downloading data from all available data centers") - # Force all time boundaries to be floats to avoid rounding errors self.seconds_before_ref = float(self.seconds_before_ref) self.seconds_after_ref = float(self.seconds_after_ref) @@ -1283,11 +1325,10 @@ def _remove_response_llnl(self, st): def rotate_streams(self): """ Rotate arbitrary three-component seismograms to desired orientation - - TODO Original code had a really complicated method for rotating, - was that necesssary? Why was st.rotate() not acceptable? + 'ZNE', 'RTZ' or 'UVW'. .. warning:: + This function combines all traces, both rotated and non-rotated components (ZNE, RTZ, UVW, but not raw, e.g., 12Z), into a single stream. This is deemed okay because we don't do any @@ -1303,8 +1344,8 @@ def rotate_streams(self): st_raw = format_streams_for_rotation(st_raw) - # For writing RAW seismograms (prior to ANY rotation). Must be - # specifically requested by User by explicitely adding 'raw' to + # For writing RAW seismograms (prior to ANY rotation). Must be + # specifically requested by User by explicitely adding 'raw' to # `write_files` list if "sac_raw" in self.write_files: self.st_raw = st_raw.copy() @@ -1333,7 +1374,7 @@ def rotate_streams(self): meta = metadata_getter(_tr.id, _tr.stats.starttime) az = meta["azimuth"] dip = meta["dip"] - except Exception: + except Exception: # NOQA logger.warning(f"no matching metadata for {_tr.id}") channel_okay = False break @@ -1371,13 +1412,13 @@ def rotate_streams(self): # Rotate to radial transverse coordinate system if "RTZ" in self.rotate: logger.info("rotating to components RTZ") - # If we rotate the ENTIRE stream at once, ObsPy only uses the + # If we rotate the ENTIRE stream at once, ObsPy only uses the # first backazimuth value which will create incorrect outputs # https://github.com/obspy/obspy/issues/2623 st_rtz = st_out.copy() # contains ZNE rotated components stations = set([tr.stats.station for tr in st_rtz]) for sta in stations: - _st = st_rtz.select(station=sta, channel=cha) + _st = st_rtz.select(station=sta) if _st and hasattr(_st[0].stats, "back_azimuth"): _st.rotate(method="NE->RT") # in place rot. st_out += _st @@ -1390,7 +1431,7 @@ def rotate_streams(self): logger.info("rotating to components UVW") st_uvw = rotate_to_uvw(st_raw) st_out += st_uvw - + try: st_out = format_sac_headers_post_rotation(st_out) except AttributeError as e: @@ -1398,62 +1439,87 @@ def rotate_streams(self): return st_out - def write(self, write_files=None, _return_filenames=False, - config_fid=None, stations_fid=None, inv_fid=None, event_fid=None, - stream_fid=None, **kwargs): + def write(self, write_files=None, _return_filenames=False, _subset=None, + **kwargs): """ Write out various files specifying information about the collected stations and waveforms. Options are: - * weights_dist: write out 'weights.dat' file sorted by distance - * weights_az: write out 'weights.dat' file sorted by azimuth - * weights_code: write out 'weights.dat' file sorted by sta code + + * config_file: write the current configuration as a YAML file * station_list: write a text file with station information * inv: write the inventory as a StationXML file * event: write the event as a QuakeML file * stream: write the stream as a single MSEED file - * sac: write the stream as individual (per-channel) SAC files with - the appropriate SAC header - * config_file: write the current configuration as a YAML file - * all: ignores all other options, writes everything listed above + * sac_zne: write the stream as individual (per-channel) SAC files + for ZNE components with the appropriate SAC header + * sac_rtz: write out per-channel SAC files for RTZ components + * sac_uvw: write out per-channel SAC files for UVW components + * weights_dist: write out CAP 'weights.dat' file sorted by distance + * weights_az: write out CAP 'weights.dat' file sorted by azimuth + * weights_code: write out CAP 'weights.dat' file sorted by sta code :type write_files: list of str :param write_files: list of files that should be written out, must match the acceptable list defined in the function or here in the - docstring + docstring. If not given, defaults to internal list of files :type _return_filenames: bool :param _return_filenames: internal flag to not actually write anything but just return a list of acceptable filenames. This keeps all the file naming definitions in one function. This is only required by the check() function. - :type config_fid: str - :param config_fid: optional name for the configuration file name - defaults to 'pysep_config.yaml' - :type stations_fid: str - :param stations_fid: optional name for the stations list file name - defaults to 'station_list.txt' - :type inv_fid: str - :param inv_fid: optional name for saved ObsPy inventory object, - defaults to 'inv.xml' - :type event_fid: optional name for saved ObsPy Event object, defaults to - 'event.xml' - :type stream_fid: optional name for saved ObsPy Stream miniseed object, - defaults to 'stream.ms' + :type _subset: list + :param _subset: internal parameter used for intermediate file saving. + PySEP will attempt to save files once they have been collected + however if the files it tries to save do not match against the + User-defined file list, they will be ignored. + + Keyword Arguments + :: + str order_station_list_by: + how to order the station list available options are: + network, station, latitude, longitude, elevation, burial. + str config_fid: + optional name for the configuration file name defaults to + 'pysep_config.yaml' + str station_fid: + optional name for the stations list file name defaults to + 'station_list.txt' + str inv_fid: + optional name for saved ObsPy inventory object, defaults to + 'inv.xml' + str event_fid: + optional name for saved ObsPy Event object, defaults to + 'event.xml' + str stream_fid: + optional name for saved ObsPy Stream miniseed object, + defaults to 'stream.ms' + str sac_subdir: + sub-directory within output directory and event directory to + save SAC files. Defaults to SAC/. Use an empty string to dump + files directly into the event directory """ - # Collect kwargs for writing - order_stations_list_by = kwargs.get("order_stations_list_by", None) + # Set some default values that can be overridden with kwargs + order_station_list_by = self.kwargs.get("order_station_list_by", None) + config_fid = self.kwargs.get("config_fid", "pysep_config.yaml") + station_fid = self.kwargs.get("station_fid", "station_file.txt") + inv_fid = self.kwargs.get("inv_fid", "inv.xml") + event_fid = self.kwargs.get("event_fid", "event.xml") + stream_fid = self.kwargs.get("stream_fid", "stream.ms") + sac_subdir = self.kwargs.get("sac_subdir", "SAC") # This is defined here so that all these filenames are in one place, # but really this set is just required by check(), not by write() _acceptable_files = {"weights_az", "weights_dist", "weights_code", "station_list", "inv", "event", "stream", - "config_file", "sac", "sac_raw", "all"} + "config_file", "sac", "sac_raw", "sac_zne", + "sac_rtz", "sac_uvw"} if _return_filenames: return _acceptable_files # Allow the user to call write() with their own set of filenames if this - # wasn't defined by the config or this is being scripted and they only + # wasn't defined by the Config or this is being scripted and they only # want certain files out at intermediate steps if write_files is None: write_files = self.write_files @@ -1464,42 +1530,44 @@ def write(self, write_files=None, _return_filenames=False, f"{_acceptable_files}" ) + # Allow for internal intermediate file saving validated against + # user-defined list + if _subset: + write_files = write_files.intersection(set(_subset)) + for weights_fid in ["weights_dist", "weights_az", "weights_code"]: - if weights_fid in write_files or "all" in write_files: + if weights_fid in write_files: order_by = weights_fid.split("_")[1] write_cap_weights_files(st=self.st, order_by=order_by, path_out=self.output_dir) - if "config_file" in write_files or "all" in write_files: + if "config_file" in write_files: logger.info("writing config YAML file") - fid = os.path.join(self.output_dir, - config_fid or f"pysep_config.yaml") - self.write_config(fid=fid) + self.write_config(fid=os.path.join(self.output_dir, config_fid)) if "station_list" in write_files or "all" in write_files: - fid = os.path.join(self.output_dir, - stations_fid or "stations_list.txt") + fid = os.path.join(self.output_dir, station_fid) logger.info("writing stations file") logger.debug(fid) - write_pysep_stations_file( + write_pysep_station_file( self.inv, self.event, fid, - order_stations_list_by=order_stations_list_by + order_station_list_by=order_station_list_by ) - if "inv" in write_files or "all" in write_files: - fid = os.path.join(self.output_dir, inv_fid or f"inv.xml") + if "inv" in write_files: + fid = os.path.join(self.output_dir, inv_fid) logger.info("writing inventory as StationXML") logger.debug(fid) self.inv.write(fid, format="STATIONXML") - if "event" in write_files or "all" in write_files: - fid = os.path.join(self.output_dir, event_fid or f"event.xml") + if "event" in write_files: + fid = os.path.join(self.output_dir, event_fid) logger.info("writing event as QuakeML") logger.debug(fid) self.event.write(fid, format="QuakeML") - if "stream" in write_files or "all" in write_files: - fid = os.path.join(self.output_dir, stream_fid or f"stream.ms") + if "stream" in write_files: + fid = os.path.join(self.output_dir, stream_fid) logger.info("writing waveform stream in MiniSEED") logger.debug(fid) with warnings.catch_warnings(): @@ -1507,36 +1575,72 @@ def write(self, write_files=None, _return_filenames=False, warnings.simplefilter("ignore") self.st.write(fid, format="MSEED") - if "sac" in write_files or "all" in write_files: - logger.info("writing each waveform trace in SAC format") - # Old PySEP dumped all files into output dir. New PySEP makes subdir - if self._legacy_naming: - _output_dir = self.output_dir - else: - _output_dir = os.path.join(self.output_dir, "SAC") + # Used for determining where to save SAC files + if self._legacy_naming: + _output_dir = self.output_dir + else: + _output_dir = os.path.join(self.output_dir, sac_subdir) + + if "sac_raw" in write_files: + logger.info("writing RAW waveforms traces in SAC format") + self._write_sac(st=self.st_raw, + output_dir=os.path.join(_output_dir, "RAW")) + + if "sac" in write_files: + logger.info("writing all waveforms traces in SAC format") self._write_sac(st=self.st, output_dir=_output_dir) - # Write RAW sac files - if self.st_raw is not None: - self._write_sac(st=self.st_raw, - output_dir=os.path.join(_output_dir, "RAW")) + # Allow outputting only certain components. If used together with 'sac', + # probably these will overwrite already written files + if "sac_zne" in write_files: + logger.info("writing ZNE waveforms traces in SAC format") + self._write_sac(st=self.st, output_dir=_output_dir, + components="ZNE") + + if "sac_rtz" in write_files: + logger.info("writing RTZ waveforms traces in SAC format") + self._write_sac(st=self.st, output_dir=_output_dir, + components="RTZ") + + if "sac_uvw" in write_files: + logger.info("writing UVW waveforms traces in SAC format") + self._write_sac(st=self.st, output_dir=_output_dir, + components="UVW") - def _write_sac(self, st, output_dir=os.getcwd()): + def _write_sac(self, st, output_dir=os.getcwd(), components=None): """ Write SAC files with a specific naming schema, which allows for both legacy (old PySEP) or non-legacy (new PySEP) naming. + + :type st: obspy.core.stream.Stream + :param st: Stream to be written + :type output_dir: str + :param output_dir: where to save the SAC files, defaults to the + current working directory + :type components: str + :param components: acceptable component values for saving files, + allows only saving subsets of the Stream. Example 'RTZNE' or + just 'R'. Must match against Trace.stats.component """ if not os.path.exists(output_dir): os.makedirs(output_dir) for tr in st: + if components and tr.stats.component not in components: + continue if self._legacy_naming: # Legacy: e.g., 20000101000000.NN.SSS.LL.CC.c _trace_id = f"{tr.get_id()[:-1]}.{tr.get_id()[-1].lower()}" - tag = f"{self.event_tag}.{_trace_id}" + if self.event_tag: + tag = f"{self.event_tag}.{_trace_id}" + else: + tag = _trace_id else: # New style: e.g., 2000-01-01T000000.NN.SSS.LL.CCC.sac - tag = f"{self.event_tag}.{tr.get_id()}.sac" + if self.event_tag: + tag = f"{self.event_tag}.{tr.get_id()}.sac" + else: + tag = f"{tr.get_id()}.sac" fid = os.path.join(output_dir, tag) logger.debug(os.path.basename(fid)) @@ -1551,6 +1655,7 @@ def write_config(self, fid=None): :type fid: str :param fid: name of the file to write. defaults to config.yaml + :param fid: name of the file to write. defaults to config.yaml """ if fid is None: fid = f"pysep_config.yaml" @@ -1562,7 +1667,7 @@ def write_config(self, fid=None): if not key.startswith("_")} # Internal attributes that don't need to go into the written config attr_remove_list = ["st", "st_raw", "event", "inv", "c", "write_files", - "plot_files", "output_dir", "station_ids"] + "plot_files", "output_dir", "station_ids", "kwargs"] if self.client.upper() != "LLNL": attr_remove_list.append("llnl_db_path") # not important unless LLNL @@ -1615,27 +1720,41 @@ def _event_tag_and_output_dir(self): :rtype: tuple of str :return: (unique event tag, path to output directory) """ - # Options for choosing how to name things. Legacy, manual or new-style - if self._legacy_naming: - logger.debug("reverting to legacy style file naming") - event_tag = format_event_tag_legacy(self.event) + # Default behavior, auto-generate event tag + if self._overwrite_event_tag is None: + # Options for choosing how to name things. Legacy or new-style + if self._legacy_naming: + logger.debug("reverting to legacy style file naming") + event_tag = format_event_tag_legacy(self.event) + else: + event_tag = format_event_tag(self.event) + # Either User turns off event tag so dump files directly to `output_dir` + # or User defines their own `event_tag` else: - event_tag = format_event_tag(self.event) - - if self._overwrite_event_tag: - logger.debug("overwriting automatically generated event tag") event_tag = self._overwrite_event_tag - logger.info(f"event tag is: {event_tag}") full_output_dir = os.path.join(self.output_dir, event_tag) + logger.info(f"full output directory is: {full_output_dir}") + if not os.path.exists(full_output_dir): os.makedirs(full_output_dir) elif not self._overwrite: logger.warning(f"output directory '{full_output_dir}' exists and " f"overwrite flag (-o/--overwrite) not set, exiting") sys.exit(0) + return event_tag, full_output_dir + def _set_log_file(self): + """ + Write logger to file as well as stdout, with the same format as the + stdout logger + """ + log_file = self.kwargs.get("log_file", "pysep.log") + fh = logging.FileHandler(os.path.join(self.output_dir, log_file)) + fh.setFormatter(logger.handlers[0].formatter) + logger.addHandler(fh) + def run(self, event=None, inv=None, st=None, **kwargs): """ Run PySEP: Seismogram Extraction and Processing. Steps in order are: @@ -1659,6 +1778,7 @@ def run(self, event=None, inv=None, st=None, **kwargs): :param st: optional user-provided strean object which will force a skip over waveform searching """ + self._set_log_file() logger.debug(f"running PySEP version {__version__}") # Overload default parameters with event input file and check validity @@ -1666,40 +1786,51 @@ def run(self, event=None, inv=None, st=None, **kwargs): self.check() self.c = self.get_client() - # Get metadata (QuakeML, StationXML) + # Get QuakeML (event) metadata if event is None: self.event = self.get_event() else: self.event = event - self.event_tag, self.output_dir = self._event_tag_and_output_dir() - # Standard method of retrieving waveforms from data center + # Intermediate write of Config file and QuakeML + self.write(_subset=["config_file", "event"], **kwargs) + + # Default method of retrieving waveforms/metadata from data center if self.use_mass_download is False: if inv is None: self.inv = self.get_stations() else: self.inv = inv self.inv = self.curtail_stations() + self.write(_subset=["inv"], **kwargs) # write out inventory # Get waveforms, format and assess waveform quality if st is None: - self.st = self.get_waveforms() + self.st_raw = self.get_waveforms() else: - self.st = st + self.st_raw = st # Use ObsPy's mass download option to gather all available data else: - self.st, self.inv = self.mass_download() + self.st_raw, self.inv = self.mass_download() + # Intermediate write of StationXML and raw waveforms + self.write(_subset=["sac_raw", "inv", "station_list"], **kwargs) + + # Quality check and process the raw waveforms. `st` is an intermediate + # attribute and will NOT be written self.st = quality_check_waveforms_before_processing( - self.st, remove_clipped=self.remove_clipped + self.st_raw, remove_clipped=self.remove_clipped ) + # Mark `st_raw` for deletion because we no longer need raw data. If the + # user wants it, it should have been written out + del self.st_raw + self.st = append_sac_headers(self.st, self.event, self.inv) if self.taup_model is not None: self.st = format_sac_header_w_taup_traveltimes(self.st, self.taup_model, self.phase_list) - # Waveform preprocessing and standardization self.st = self.preprocess() @@ -1713,8 +1844,10 @@ def run(self, event=None, inv=None, st=None, **kwargs): self.st, remove_insufficient_length=self.remove_insufficient_length ) - # Generate outputs for user consumption - self.write(**{**kwargs, **self.kwargs}) + # Write out the remainder files and make figures for user consumption + self.write(_subset=["stream", "sac", "sac_zne", "sac_rtz", "sac_uvw", + "weights_dist", "weights_az", "weights_code"], + **kwargs) self.plot() @@ -1760,7 +1893,7 @@ def parse_args(): parser.add_argument("--legacy_naming", default=False, action="store_true", help="use the file naming schema and directory " "structure of the legacy version of PySEP.") - parser.add_argument("--overwrite_event_tag", default="", type=str, + parser.add_argument("--overwrite_event_tag", default=None, type=str, nargs="?", help="Manually set the event tag used to name the " "output directory and SAC files. If None, will " diff --git a/pysep/recsec.py b/pysep/recsec.py index 4329847..4ce83e0 100755 --- a/pysep/recsec.py +++ b/pysep/recsec.py @@ -1521,8 +1521,10 @@ def plot(self, subset=None, page_num=None, **kwargs): self.kwargs[name] = val self.ax = set_plot_aesthetic(ax=self.ax, **self.kwargs) - # Partition the figure by user-specified azimuth bins - if self.sort_by and "azimuth" in self.sort_by: + # Partition the figure by user-specified azimuth bins for relative + # (back)azimuth sorting only (not absolute) + if self.sort_by and ("azimuth" in self.sort_by) and \ + ("abs_" not in self.sort_by): self._plot_azimuth_bins(start=start, stop=stop) # Finalize the figure accoutrements diff --git a/pysep/utils/io.py b/pysep/utils/io.py index 7360a5e..e1f5a15 100644 --- a/pysep/utils/io.py +++ b/pysep/utils/io.py @@ -302,7 +302,7 @@ def _get_resource_id(name, res_type, tag=None): with open(path_to_cmtsolution, "r") as f: lines = f.readlines() - # First line contains meta informatino about event + # First line contains meta information about event _, year, month, day, hour, minute, sec, lat, lon, depth, mb, ms, _ = \ lines[0].strip().split() @@ -559,8 +559,8 @@ def write_sem(st, unit, path="./", time_offset=0): np.savetxt(fid, data, ["%13.7f", "%17.7f"]) -def write_pysep_stations_file(inv, event, fid="./stations_list.txt", - order_stations_list_by=None): +def write_pysep_station_file(inv, event, fid="./station_list.txt", + order_station_list_by=None): """ Write a list of station codes, distances, etc. useful for understanding characterization of all collected stations @@ -572,20 +572,20 @@ def write_pysep_stations_file(inv, event, fid="./stations_list.txt", :param inv: optional user-provided inventory object which will force a skip over StationXML/inventory searching :type fid: str - :param fid: name of the file to write to. defaults to ./stations_list.txt - :type order_by: str - :param order_by: how to order the stations written to file. Available - are: network, station, latitude, longitude, elevation, burial. + :param fid: name of the file to write to. defaults to ./station_list.txt + :type order_station_list_by: str + :param order_station_list_by: how to order the stations written to file. + Available are: network, station, latitude, longitude, elevation, burial. If not given, order is determined by the internal sorting of the input Inventory """ # Key indices correspond to stations list keys = ["station", "network", "latitude", "longitude", "distance", "azimuth"] - if order_stations_list_by and order_stations_list_by not in keys: - logger.warning(f"`order_stations_by` must be in {keys}, " + if order_station_list_by and order_station_list_by not in keys: + logger.warning(f"`order_station_by` must be in {keys}, " f"setting default") - order_stations_by = None + order_station_by = None event_latitude = event.preferred_origin().latitude event_longitude = event.preferred_origin().longitude @@ -603,8 +603,8 @@ def write_pysep_stations_file(inv, event, fid="./stations_list.txt", dist_km, az]) # Set the order of the station file based on the order of keys - if order_stations_list_by: - idx = keys.index(order_stations_list_by) + if order_station_list_by: + idx = keys.index(order_station_list_by) stations.sort(key=lambda x: x[idx]) with open(fid, "w") as f: @@ -678,6 +678,7 @@ def write_stations_file(inv, fid="./STATIONS", order_by=None, f"{s[4]:7.1f}{s[5]:7.1f}\n" ) + def write_cat_to_event_list(cat, fid_out="event_input.txt"): """ Writes out an event Catalog (ObsPy object) information to an ASCII file that