diff --git a/README.md b/README.md index e99ab4b..577ae50 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,7 @@ This simple package is a collection of useful utilities needed for streaming/downloading of HLS audio from Orcasound AWS buckets. `HLSStream` class is being used for continuous streaming and `DateRangeHLSStream` class is being used to get data in the specified time range. + +To install locally, use + +`pip install . ` diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8ab824c --- /dev/null +++ b/setup.py @@ -0,0 +1,2 @@ +from setuptools import setup +setup() \ No newline at end of file diff --git a/src/orca_hls_utils/DateRangeHLSStream.py b/src/orca_hls_utils/DateRangeHLSStream.py index 8aa4221..7ff9f3f 100644 --- a/src/orca_hls_utils/DateRangeHLSStream.py +++ b/src/orca_hls_utils/DateRangeHLSStream.py @@ -1,31 +1,31 @@ - -from . import s3_utils -import boto3 -from botocore import UNSIGNED -from botocore.config import Config - -import m3u8 +# Native imports import math -from . import scraper -import ffmpeg import os from datetime import datetime -from datetime import timedelta -from pytz import timezone import time -import sys from pathlib import Path -from . import datetime_utils +from tempfile import TemporaryDirectory +import shutil + +# Required imports +import ffmpeg +import m3u8 +from pytz import timezone + +# Local imports +from . import s3_utils, datetime_utils, scraper + def get_readable_clipname(hydrophone_id, cliptime_utc): # cliptime is of the form 2020-09-27T00/16/55.677242Z cliptime_utc = timezone('UTC').localize(cliptime_utc) date = cliptime_utc.astimezone(timezone('US/Pacific')) - date_format='%Y_%m_%d_%H_%M_%S_%Z' + date_format = '%Y_%m_%d_%H_%M_%S_%Z' clipname = date.strftime(date_format) return hydrophone_id + "_" + clipname, date -#TODO: Handle date ranges that don't exist + +# TODO: Handle date ranges that don't exist class DateRangeHLSStream(): """ stream_base = 'https://s3-us-west-2.amazonaws.com/streaming-orcasound-net/rpi_orcasound_lab' @@ -52,6 +52,9 @@ def __init__(self, stream_base, polling_interval, start_unix_time, end_unix_time self.real_time = real_time self.is_end_of_stream = False + # Create wav dir if necessary + Path(self.wav_dir).mkdir(parents=True, exist_ok=True) + # query the stream base for all m3u8 files between the timestamps # split the stream base into bucket and folder @@ -64,7 +67,7 @@ def __init__(self, stream_base, polling_interval, start_unix_time, end_unix_time self.folder_name = tokens[1] prefix = self.folder_name + "/hls/" - # returns folder names corresponding to epochs, this grows as more data is added, we should probably maintain a list of + # returns folder names corresponding to epochs, this grows as more data is added, we should probably maintain a list of # hydrophone folders that exist all_hydrophone_folders = s3_utils.get_all_folders(self.s3_bucket, prefix=prefix) print("Found {} folders in all for hydrophone".format(len(all_hydrophone_folders))) @@ -75,7 +78,7 @@ def __init__(self, stream_base, polling_interval, start_unix_time, end_unix_time self.current_folder_index = 0 self.current_clip_start_time = self.start_unix_time - def get_next_clip(self, current_clip_name = None): + def get_next_clip(self, current_clip_name=None): """ """ @@ -96,7 +99,7 @@ def get_next_clip(self, current_clip_name = None): if time_to_sleep > 0: time.sleep(time_to_sleep) - + # read in current m3u8 file # stream_url for the current AWS folder stream_url = "{}/hls/{}/live.m3u8".format( @@ -121,36 +124,34 @@ def get_next_clip(self, current_clip_name = None): self.current_clip_start_time = datetime_utils.add_interval_to_unix_time(self.current_clip_start_time, self.polling_interval_in_seconds) # Create tmp path to hold .ts segments - tmp_path = "tmp_path" - os.makedirs(tmp_path,exist_ok=True) - - file_names = [] - for i in range(segment_start_index, segment_end_index): - audio_segment = stream_obj.segments[i] - base_path = audio_segment.base_uri - file_name = audio_segment.uri - audio_url = base_path + file_name - try: - scraper.download_from_url(audio_url,tmp_path) - file_names.append(file_name) - except Exception: - print("Skipping",audio_url,": error.") - - # concatentate all .ts files with ffmpeg - hls_file = (clipname+".ts") - audio_file = (clipname+".wav") - wav_file_path = os.path.join(self.wav_dir, audio_file) - filenames_str = " ".join(file_names) - concat_ts_cmd = "cd {tp} && cat {fstr} > {hls_file}".format(tp=tmp_path, fstr=filenames_str, hls_file=hls_file) - os.system(concat_ts_cmd) - - # read the concatenated .ts and write to wav - stream = ffmpeg.input(os.path.join(tmp_path, Path(hls_file))) - stream = ffmpeg.output(stream, wav_file_path) - ffmpeg.run(stream, overwrite_output=self.overwrite_output, quiet=False) - - # clear the tmp_path - os.system(f'rm -rf {tmp_path}') + with TemporaryDirectory() as tmp_path: + os.makedirs(tmp_path, exist_ok=True) + + file_names = [] + for i in range(segment_start_index, segment_end_index): + audio_segment = stream_obj.segments[i] + base_path = audio_segment.base_uri + file_name = audio_segment.uri + audio_url = base_path + file_name + try: + scraper.download_from_url(audio_url, tmp_path) + file_names.append(file_name) + except Exception: + print("Skipping", audio_url, ": error.") + + # concatentate all .ts files + hls_file = os.path.join(tmp_path, Path(clipname+".ts")) + with open(hls_file, 'wb') as wfd: + for f in file_names: + with open(os.path.join(tmp_path, f), 'rb') as fd: + shutil.copyfileobj(fd, wfd) + + # read the concatenated .ts and write to wav + audio_file = (clipname+".wav") + wav_file_path = os.path.join(self.wav_dir, audio_file) + stream = ffmpeg.input(os.path.join(tmp_path, Path(hls_file))) + stream = ffmpeg.output(stream, wav_file_path) + ffmpeg.run(stream, overwrite_output=self.overwrite_output, quiet=False) # If we're in demo mode, we need to fake timestamps to make it seem like the date range is real-time if current_clip_name: