Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OOI inputs #36

Merged
merged 23 commits into from
Aug 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CI
name: Code Quality

on:
push:
Expand Down
35 changes: 32 additions & 3 deletions .github/workflows/ooi_processing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,30 @@ name: OOI processing
on:
# To run manually
workflow_dispatch:
inputs:
node:
description: 'Alphanumeric node id (e.g. PC01A)'
required: false
default: 'PC01A'
start_time:
description: 'start_time formatted as Y-m-dTH-M-S'
required: false
end_time:
description: 'end_time formatted as Y-m-dTH-M-S'
required: false
segment_length:
description: 'Segment length in minutes (float)'
required: false
default: 5.0
# Run workflow at 12:00 UTC every day
schedule:
- cron: '0 12 * * *'

env:
NODE: ${{ github.event.inputs.node || 'PC01A' }}
SEGMENT_LENGTH: ${{ github.event.inputs.segment_length || 5 }}
OUTPUT_DIR: spectrograms

jobs:
process:
runs-on: ubuntu-latest
Expand All @@ -24,13 +44,22 @@ jobs:
python -m pip install -U setuptools wheel
python -m pip install ooipy

- name: Execute Python script
- name: Custom timeframe
if: ${{ github.event.inputs.start_time }}
run: |
python ooi_processing.py -o ${{ env.OUTPUT_DIR }} --node ${{ env.NODE }} \
-l ${{ env.SEGMENT_LENGTH }} -s ${{ github.event.inputs.start_time }} \
-e ${{ github.event.inputs.end_time }}

- name: Default timeframe
if: ${{ !github.event.inputs.start_time }}
run: |
python ooi_processing.py
python ooi_processing.py -o ${{ env.OUTPUT_DIR }} --node ${{ env.NODE }} \
-l ${{ env.SEGMENT_LENGTH }}

- name: Upload spectrograms
uses: actions/upload-artifact@v2
with:
name: Spectrograms
path: |
*.png
${{ env.OUTPUT_DIR }}/*.png
24 changes: 24 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Tests

on: [push, pull_request, workflow_dispatch]

jobs:
tests:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2

- uses: actions/setup-python@v2
with:
python-version: '3.x'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade setuptools wheel
python -m pip install pytest ooipy

- name: Run tests
run: |
python -m pytest
26 changes: 23 additions & 3 deletions create_spectrogram.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
import argparse
import logging
from os import path
from pathlib import Path

import matplotlib.pyplot as plt
from scipy.io import wavfile


def create_spec_name(wav_name, output_dir=None):
"""Creates appropriate path to the spectrogram from input .wav file and output directory.

Args:
`wav_name`: Path to the input .wav file.
`output_dir`: Path to the output directory.
Returns:
Path to the output spectrogram.
"""
spec_name = path.splitext(path.basename(wav_name))[0]
if output_dir is not None:
spec_name = path.join(path.normpath(output_dir), spec_name)
return f"{spec_name}.png"


def plot_psd(data, samplerate, nfft=256, noverlap=128):
"""Plots power spectral density spectrogram.

Expand All @@ -28,12 +45,12 @@ def save_spectrogram(input_wav, plot_path=None, nfft=256):
`plot_path`: Path to the output spectrogram file. Default is `input_wav` with .png extension.
`nfft`: The number of data points used in each block for the FFT. A power 2 is most efficient.
Returns:
None
Path to the spectrogram.
"""
samplerate, data = wavfile.read(input_wav)
noverlap = nfft // 2 if nfft <= 128 else 128

title = input_wav.removesuffix(".wav")
title = path.splitext(path.basename(input_wav))[0]
plt.title(title)
if len(data.shape) == 1:
plot_psd(data, samplerate, nfft, noverlap)
Expand All @@ -49,12 +66,15 @@ def save_spectrogram(input_wav, plot_path=None, nfft=256):
plt.xlabel("Time [s]")

if plot_path is None:
plot_path = input_wav.replace(".wav", ".png")
plot_path = f"{path.splitext(input_wav)[0]}.png"
else:
Path(path.dirname(plot_path)).mkdir(parents=True, exist_ok=True)
plt.savefig(plot_path)

plt.cla()
plt.close("all")
logging.info("Finished " + input_wav)
return plot_path


if __name__ == "__main__":
Expand Down
137 changes: 113 additions & 24 deletions ooi_processing.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,124 @@
import argparse
import datetime
import logging
import os
import sys

from ooipy.request import hydrophone_request

from create_spectrogram import save_spectrogram
from create_spectrogram import create_spec_name, save_spectrogram

logging.basicConfig(
format="%(levelname)s:%(message)s", stream=sys.stdout, level=logging.INFO
)

end_time = datetime.datetime.combine(
datetime.datetime.today(), datetime.datetime.min.time()
)
start_time = end_time - datetime.timedelta(days=1)
segment_length = datetime.timedelta(minutes=5)
node = "PC01A"
def save_ooi_spectrograms(
start_time,
end_time,
segment_length=datetime.timedelta(minutes=5),
node="PC01A",
output_dir="spectrograms",
nfft=256,
):
"""
Creates spectrograms for each time segment in the specified time range.

while start_time < end_time:
segment_end = min(start_time + segment_length, end_time)
hydrophone_data = hydrophone_request.get_acoustic_data(
start_time, segment_end, node, verbose=True
)
if hydrophone_data is None:
logging.info(f"Could not get data from {start_time} to {segment_end}")
Args:
`start_time`: `datetime.datetime`
`end_time`: `datetime.datetime`
`segment_length`: `datetime.timedelta`
`node`: One of the preset OOI nodes.
`output_dir`: Path to the output directory.
`nfft`: The number of data points used in each block for the FFT. A power 2 is most efficient.
Returns:
None
"""
while start_time < end_time:
Molkree marked this conversation as resolved.
Show resolved Hide resolved
segment_end = min(start_time + segment_length, end_time)
hydrophone_data = hydrophone_request.get_acoustic_data(
start_time, segment_end, node, verbose=True
)
if hydrophone_data is None:
logging.info(f"Could not get data from {start_time} to {segment_end}")
start_time = segment_end
continue
datestr = start_time.strftime("%Y-%m-%dT%H-%M-%S-%f")[:-3]
wav_name = f"{datestr}.wav"
hydrophone_data.wav_write(wav_name)
spec_fname = create_spec_name(wav_name, output_dir)
save_spectrogram(wav_name, spec_fname, nfft)
os.remove(wav_name)
start_time = segment_end
continue
datestr = start_time.strftime("%Y-%m-%dT%H-%M-%S-%f")[:-3]
wav_name = f"{datestr}.wav"
hydrophone_data.wav_write(wav_name)
save_spectrogram(wav_name)
os.remove(wav_name)
start_time = segment_end


if __name__ == "__main__":
logging.basicConfig(
format="%(levelname)s:%(message)s", stream=sys.stdout, level=logging.INFO
)
parser = argparse.ArgumentParser(
description="Creates spectrogram for each segment."
)
parser.add_argument(
"--node",
help="Alphanumeric node id (e.g. PC01A)",
default="PC01A",
choices=[
"LJ01D",
"LJ01A",
"PC01A",
"PC03A",
"LJ01C",
"LJ03A",
"AXABA1",
"AXCC1",
"AXEC2",
"HYS14",
"HYSB1",
],
)
parser.add_argument(
"-s",
"--start_time",
help="Start time formatted as Y-m-dTH-M-S",
)
parser.add_argument(
"-e",
"--end_time",
help="End time formatted as Y-m-dTH-M-S",
)
parser.add_argument(
"-l",
"--segment_length",
type=float,
default=5,
help="Segment length in minutes. Default is %(default)s.",
)
parser.add_argument(
"-o",
"--output",
default="spectrograms",
help="Path to the output directory for spectrograms. Default is %(default)s.",
)
parser.add_argument(
"-n",
"--nfft",
type=int,
default=256,
help="The number of data points used in each block for the FFT. A power 2 is most efficient. Default is %(default)s.",
)
args = parser.parse_args()

if args.end_time is None:
end_time = datetime.datetime.combine(
datetime.datetime.today(), datetime.datetime.min.time()
)
else:
end_time = datetime.datetime.strptime(args.end_time, "%Y-%m-%dT%H-%M-%S")

if args.start_time is None:
start_time = end_time - datetime.timedelta(days=1)
else:
start_time = datetime.datetime.strptime(args.start_time, "%Y-%m-%dT%H-%M-%S")

segment_length = datetime.timedelta(minutes=args.segment_length)

save_ooi_spectrograms(
start_time, end_time, segment_length, args.node, args.output, args.nfft
)
13 changes: 5 additions & 8 deletions orcasound_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import ffmpeg
import m3u8

from create_spectrogram import save_spectrogram
from create_spectrogram import create_spec_name, save_spectrogram


def convert_with_ffmpeg(input_file, output_file):
Expand Down Expand Up @@ -37,7 +37,8 @@ def create_readable_name(directory, timestamp):


def convert2wav(input_dir, output_dir):
"""Converts all `.ts` files from `live.m3u8` to `.wav`.
"""
Converts all `.ts` files from `live.m3u8` to `.wav`.

All files will have the following format: `%Y-%m-%dT%H-%M-%S.wav`

Expand Down Expand Up @@ -80,17 +81,13 @@ def convert2wav(input_dir, output_dir):
"-n",
"--nfft",
type=int,
help="The number of data points used in each block for the FFT. A power 2 is most efficient. Default is 256.",
default=256,
help="The number of data points used in each block for the FFT. A power 2 is most efficient. Default is %(default)s.",
)
args = parser.parse_args()

convert2wav(path.normpath(args.input_dir), "wav")

for input_wav in sorted(glob.glob("wav/*.wav")):
output_fname = None
if args.output is not None:
Path(args.output).mkdir(parents=True, exist_ok=True)
file_name = path.splitext(path.basename(input_wav))[0]
output_fname = f"{path.normpath(args.output)}/{file_name}"
output_fname = create_spec_name(input_wav, args.output)
save_spectrogram(input_wav, output_fname, args.nfft)
Binary file added tests/ooi.wav
Binary file not shown.
Binary file added tests/ooi_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/orcasound.wav
Binary file not shown.
Binary file added tests/orcasound_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
49 changes: 49 additions & 0 deletions tests/test_ooi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Unit tests for OOI workflow"""
import datetime
import os
import shutil
from os import path

import pytest

from ooi_processing import save_ooi_spectrograms


@pytest.mark.parametrize(
"start_time_str, end_time_str, segment_length_float, node, output_dir, expected_files_count",
[
("2017-03-10T00-00-00", "2017-03-10T00-05-00", None, None, None, 1),
("2017-03-10T00-00-00", "2017-03-10T00-05-00", 1, None, None, 5),
("2017-03-10T00-00-00", "2017-03-10T00-05-00", 1, None, "long/path/", 5),
Molkree marked this conversation as resolved.
Show resolved Hide resolved
("2017-03-10T00-05-00", "2017-03-10T00-00-00", 1, None, None, 0),
],
)
def test_ooi_spectrograms(
start_time_str,
end_time_str,
segment_length_float,
node,
output_dir,
expected_files_count,
):
start_time = datetime.datetime.strptime(start_time_str, "%Y-%m-%dT%H-%M-%S")
end_time = datetime.datetime.strptime(end_time_str, "%Y-%m-%dT%H-%M-%S")
if segment_length_float is None:
segment_length_float = 5.0
if node is None:
node = "PC01A"
if output_dir is None:
output_dir = "spectrograms"
segment_length = datetime.timedelta(minutes=segment_length_float)
save_ooi_spectrograms(start_time, end_time, segment_length, node, output_dir)
if expected_files_count > 0:
assert expected_files_count == len(
[
name
for name in os.listdir(output_dir)
if path.isfile(path.join(output_dir, name))
]
)
shutil.rmtree(output_dir)
else:
assert not path.exists(output_dir)
Loading