orcasound · valentina-s · Aug 16, 2021 · Jul 19, 2021 · Jul 19, 2021 · Jul 19, 2021
diff --git a/.github/workflows/ci.yml → .github/workflows/code_quality.yml b/.github/workflows/ci.yml → .github/workflows/code_quality.yml
@@ -1,4 +1,4 @@
-name: CI
+name: Code Quality
 
 on:
   push:

diff --git a/.github/workflows/ooi_processing.yml b/.github/workflows/ooi_processing.yml
@@ -3,10 +3,30 @@ name: OOI processing
 on:
   # To run manually
   workflow_dispatch:
+    inputs:
+      node:
+        description: 'Alphanumeric node id (e.g. PC01A)'
+        required: false
+        default: 'PC01A'
+      start_time:
+        description: 'start_time formatted as Y-m-dTH-M-S'
+        required: false
+      end_time:
+        description: 'end_time formatted as Y-m-dTH-M-S'
+        required: false
+      segment_length:
+        description: 'Segment length in minutes (float)'
+        required: false
+        default: 5.0
   # Run workflow at 12:00 UTC every day
   schedule:
   - cron: '0 12 * * *'
 
+env:
+  NODE: ${{ github.event.inputs.node || 'PC01A' }}
+  SEGMENT_LENGTH: ${{ github.event.inputs.segment_length || 5 }}
+  OUTPUT_DIR: spectrograms
+
 jobs:
   process:
     runs-on: ubuntu-latest
@@ -24,13 +44,22 @@ jobs:
         python -m pip install -U setuptools wheel
         python -m pip install ooipy
 
-    - name: Execute Python script
+    - name: Custom timeframe
+      if: ${{ github.event.inputs.start_time }}
+      run: |
+        python ooi_processing.py -o ${{ env.OUTPUT_DIR }} --node ${{ env.NODE }} \
+          -l ${{ env.SEGMENT_LENGTH }} -s ${{ github.event.inputs.start_time }} \
+          -e ${{ github.event.inputs.end_time }}
+
+    - name: Default timeframe
+      if: ${{ !github.event.inputs.start_time }}
       run: |
-        python ooi_processing.py
+        python ooi_processing.py -o ${{ env.OUTPUT_DIR }} --node ${{ env.NODE }} \
+          -l ${{ env.SEGMENT_LENGTH }}
 
     - name: Upload spectrograms
       uses: actions/upload-artifact@v2
       with:
         name: Spectrograms
         path: |
-          *.png
+          ${{ env.OUTPUT_DIR }}/*.png
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,24 @@
+name: Tests
+
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools wheel
+          python -m pip install pytest ooipy
+
+      - name: Run tests
+        run: |
+          python -m pytest
diff --git a/create_spectrogram.py b/create_spectrogram.py
@@ -1,10 +1,27 @@
 import argparse
 import logging
+from os import path
+from pathlib import Path
 
 import matplotlib.pyplot as plt
 from scipy.io import wavfile
 
 
+def create_spec_name(wav_name, output_dir=None):
+    """Creates appropriate path to the spectrogram from input .wav file and output directory.
+
+    Args:
+        `wav_name`: Path to the input .wav file.
+        `output_dir`: Path to the output directory.
+    Returns:
+        Path to the output spectrogram.
+    """
+    spec_name = path.splitext(path.basename(wav_name))[0]
+    if output_dir is not None:
+        spec_name = path.join(path.normpath(output_dir), spec_name)
+    return f"{spec_name}.png"
+
+
 def plot_psd(data, samplerate, nfft=256, noverlap=128):
     """Plots power spectral density spectrogram.
 
@@ -28,12 +45,12 @@ def save_spectrogram(input_wav, plot_path=None, nfft=256):
         `plot_path`: Path to the output spectrogram file. Default is `input_wav` with .png extension.
         `nfft`: The number of data points used in each block for the FFT. A power 2 is most efficient.
     Returns:
-        None
+        Path to the spectrogram.
     """
     samplerate, data = wavfile.read(input_wav)
     noverlap = nfft // 2 if nfft <= 128 else 128
 
-    title = input_wav.removesuffix(".wav")
+    title = path.splitext(path.basename(input_wav))[0]
     plt.title(title)
     if len(data.shape) == 1:
         plot_psd(data, samplerate, nfft, noverlap)
@@ -49,12 +66,15 @@ def save_spectrogram(input_wav, plot_path=None, nfft=256):
     plt.xlabel("Time [s]")
 
     if plot_path is None:
-        plot_path = input_wav.replace(".wav", ".png")
+        plot_path = f"{path.splitext(input_wav)[0]}.png"
+    else:
+        Path(path.dirname(plot_path)).mkdir(parents=True, exist_ok=True)
     plt.savefig(plot_path)
 
     plt.cla()
     plt.close("all")
     logging.info("Finished " + input_wav)
+    return plot_path
 
 
 if __name__ == "__main__":

diff --git a/ooi_processing.py b/ooi_processing.py
@@ -1,35 +1,124 @@
+import argparse
 import datetime
 import logging
 import os
 import sys
 
 from ooipy.request import hydrophone_request
 
-from create_spectrogram import save_spectrogram
+from create_spectrogram import create_spec_name, save_spectrogram
 
-logging.basicConfig(
-    format="%(levelname)s:%(message)s", stream=sys.stdout, level=logging.INFO
-)
 
-end_time = datetime.datetime.combine(
-    datetime.datetime.today(), datetime.datetime.min.time()
-)
-start_time = end_time - datetime.timedelta(days=1)
-segment_length = datetime.timedelta(minutes=5)
-node = "PC01A"
+def save_ooi_spectrograms(
+    start_time,
+    end_time,
+    segment_length=datetime.timedelta(minutes=5),
+    node="PC01A",
+    output_dir="spectrograms",
+    nfft=256,
+):
+    """
+    Creates spectrograms for each time segment in the specified time range.
 
-while start_time < end_time:
-    segment_end = min(start_time + segment_length, end_time)
-    hydrophone_data = hydrophone_request.get_acoustic_data(
-        start_time, segment_end, node, verbose=True
-    )
-    if hydrophone_data is None:
-        logging.info(f"Could not get data from {start_time} to {segment_end}")
+    Args:
+        `start_time`: `datetime.datetime`
+        `end_time`: `datetime.datetime`
+        `segment_length`: `datetime.timedelta`
+        `node`: One of the preset OOI nodes.
+        `output_dir`: Path to the output directory.
+        `nfft`: The number of data points used in each block for the FFT. A power 2 is most efficient.
+    Returns:
+        None
+    """
+    while start_time < end_time:
+        segment_end = min(start_time + segment_length, end_time)
+        hydrophone_data = hydrophone_request.get_acoustic_data(
+            start_time, segment_end, node, verbose=True
+        )
+        if hydrophone_data is None:
+            logging.info(f"Could not get data from {start_time} to {segment_end}")
+            start_time = segment_end
+            continue
+        datestr = start_time.strftime("%Y-%m-%dT%H-%M-%S-%f")[:-3]
+        wav_name = f"{datestr}.wav"
+        hydrophone_data.wav_write(wav_name)
+        spec_fname = create_spec_name(wav_name, output_dir)
+        save_spectrogram(wav_name, spec_fname, nfft)
+        os.remove(wav_name)
         start_time = segment_end
-        continue
-    datestr = start_time.strftime("%Y-%m-%dT%H-%M-%S-%f")[:-3]
-    wav_name = f"{datestr}.wav"
-    hydrophone_data.wav_write(wav_name)
-    save_spectrogram(wav_name)
-    os.remove(wav_name)
-    start_time = segment_end
+
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        format="%(levelname)s:%(message)s", stream=sys.stdout, level=logging.INFO
+    )
+    parser = argparse.ArgumentParser(
+        description="Creates spectrogram for each segment."
+    )
+    parser.add_argument(
+        "--node",
+        help="Alphanumeric node id (e.g. PC01A)",
+        default="PC01A",
+        choices=[
+            "LJ01D",
+            "LJ01A",
+            "PC01A",
+            "PC03A",
+            "LJ01C",
+            "LJ03A",
+            "AXABA1",
+            "AXCC1",
+            "AXEC2",
+            "HYS14",
+            "HYSB1",
+        ],
+    )
+    parser.add_argument(
+        "-s",
+        "--start_time",
+        help="Start time formatted as Y-m-dTH-M-S",
+    )
+    parser.add_argument(
+        "-e",
+        "--end_time",
+        help="End time formatted as Y-m-dTH-M-S",
+    )
+    parser.add_argument(
+        "-l",
+        "--segment_length",
+        type=float,
+        default=5,
+        help="Segment length in minutes. Default is %(default)s.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default="spectrograms",
+        help="Path to the output directory for spectrograms. Default is %(default)s.",
+    )
+    parser.add_argument(
+        "-n",
+        "--nfft",
+        type=int,
+        default=256,
+        help="The number of data points used in each block for the FFT. A power 2 is most efficient. Default is %(default)s.",
+    )
+    args = parser.parse_args()
+
+    if args.end_time is None:
+        end_time = datetime.datetime.combine(
+            datetime.datetime.today(), datetime.datetime.min.time()
+        )
+    else:
+        end_time = datetime.datetime.strptime(args.end_time, "%Y-%m-%dT%H-%M-%S")
+
+    if args.start_time is None:
+        start_time = end_time - datetime.timedelta(days=1)
+    else:
+        start_time = datetime.datetime.strptime(args.start_time, "%Y-%m-%dT%H-%M-%S")
+
+    segment_length = datetime.timedelta(minutes=args.segment_length)
+
+    save_ooi_spectrograms(
+        start_time, end_time, segment_length, args.node, args.output, args.nfft
+    )
diff --git a/orcasound_processing.py b/orcasound_processing.py
@@ -9,7 +9,7 @@
 import ffmpeg
 import m3u8
 
-from create_spectrogram import save_spectrogram
+from create_spectrogram import create_spec_name, save_spectrogram
 
 
 def convert_with_ffmpeg(input_file, output_file):
@@ -37,7 +37,8 @@ def create_readable_name(directory, timestamp):
 
 
 def convert2wav(input_dir, output_dir):
-    """Converts all `.ts` files from `live.m3u8` to `.wav`.
+    """
+    Converts all `.ts` files from `live.m3u8` to `.wav`.
 
     All files will have the following format: `%Y-%m-%dT%H-%M-%S.wav`
 
@@ -80,17 +81,13 @@ def convert2wav(input_dir, output_dir):
         "-n",
         "--nfft",
         type=int,
-        help="The number of data points used in each block for the FFT. A power 2 is most efficient. Default is 256.",
         default=256,
+        help="The number of data points used in each block for the FFT. A power 2 is most efficient. Default is %(default)s.",
     )
     args = parser.parse_args()
 
     convert2wav(path.normpath(args.input_dir), "wav")
 
     for input_wav in sorted(glob.glob("wav/*.wav")):
-        output_fname = None
-        if args.output is not None:
-            Path(args.output).mkdir(parents=True, exist_ok=True)
-            file_name = path.splitext(path.basename(input_wav))[0]
-            output_fname = f"{path.normpath(args.output)}/{file_name}"
+        output_fname = create_spec_name(input_wav, args.output)
         save_spectrogram(input_wav, output_fname, args.nfft)
diff --git a/tests/ooi.wav b/tests/ooi.wav
diff --git a/tests/ooi_example.png b/tests/ooi_example.png
diff --git a/tests/orcasound.wav b/tests/orcasound.wav
diff --git a/tests/orcasound_example.png b/tests/orcasound_example.png
diff --git a/tests/test_ooi.py b/tests/test_ooi.py
@@ -0,0 +1,49 @@
+"""Unit tests for OOI workflow"""
+import datetime
+import os
+import shutil
+from os import path
+
+import pytest
+
+from ooi_processing import save_ooi_spectrograms
+
+
+@pytest.mark.parametrize(
+    "start_time_str, end_time_str, segment_length_float, node, output_dir, expected_files_count",
+    [
+        ("2017-03-10T00-00-00", "2017-03-10T00-05-00", None, None, None, 1),
+        ("2017-03-10T00-00-00", "2017-03-10T00-05-00", 1, None, None, 5),
+        ("2017-03-10T00-00-00", "2017-03-10T00-05-00", 1, None, "long/path/", 5),
+        ("2017-03-10T00-05-00", "2017-03-10T00-00-00", 1, None, None, 0),
+    ],
+)
+def test_ooi_spectrograms(
+    start_time_str,
+    end_time_str,
+    segment_length_float,
+    node,
+    output_dir,
+    expected_files_count,
+):
+    start_time = datetime.datetime.strptime(start_time_str, "%Y-%m-%dT%H-%M-%S")
+    end_time = datetime.datetime.strptime(end_time_str, "%Y-%m-%dT%H-%M-%S")
+    if segment_length_float is None:
+        segment_length_float = 5.0
+    if node is None:
+        node = "PC01A"
+    if output_dir is None:
+        output_dir = "spectrograms"
+    segment_length = datetime.timedelta(minutes=segment_length_float)
+    save_ooi_spectrograms(start_time, end_time, segment_length, node, output_dir)
+    if expected_files_count > 0:
+        assert expected_files_count == len(
+            [
+                name
+                for name in os.listdir(output_dir)
+                if path.isfile(path.join(output_dir, name))
+            ]
+        )
+        shutil.rmtree(output_dir)
+    else:
+        assert not path.exists(output_dir)