Skip to content

Commit

Permalink
Merge pull request #46 from benfmiller/working
Browse files Browse the repository at this point in the history
Adding normalize field in BaseConfig
  • Loading branch information
benfmiller authored Feb 6, 2023
2 parents 564321a + 9b46ceb commit fe1a54a
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 25 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Change Log

## [1.2.2] 2023 - 02 -05

### Added

- normalize option in BaseConfig, Turns off all normalization if False

## [1.2.1] 2023 - 01 - 08

### Changed
Expand Down
35 changes: 29 additions & 6 deletions audalign/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
from audalign.config import BaseConfig
from audalign.recognizers import BaseRecognizer
from audalign.recognizers.correcognize import CorrelationRecognizer
from audalign.recognizers.correcognizeSpectrogram import \
CorrelationSpectrogramRecognizer
from audalign.recognizers.correcognizeSpectrogram import (
CorrelationSpectrogramRecognizer,
)
from audalign.recognizers.fingerprint import FingerprintRecognizer
from audalign.recognizers.visrecognize import VisualRecognizer

Expand Down Expand Up @@ -224,11 +225,15 @@ def fine_align(
strength_stat=recognizer.config.CONFIDENCE,
)
paths_audio = filehandler.shift_get_files(
recalc_shifts_results, sample_rate=recognizer.config.sample_rate
recalc_shifts_results,
sample_rate=recognizer.config.sample_rate,
normalize=recognizer.config.normalize,
)
else:
paths_audio = filehandler.shift_get_files(
results, sample_rate=recognizer.config.sample_rate
results,
sample_rate=recognizer.config.sample_rate,
normalize=recognizer.config.normalize,
)

max_lags_not_set = False
Expand Down Expand Up @@ -265,6 +270,7 @@ def fine_align(
new_results["names_and_paths"],
write_extension,
write_multi_channel=write_multi_channel,
normalize=recognizer.config.normalize,
)
except PermissionError:
print("Permission Denied for write fine_align")
Expand All @@ -280,6 +286,7 @@ def write_processed_file(
destination_file: str,
start_end: tuple = None,
sample_rate: int = BaseConfig.sample_rate,
normalize: bool = BaseConfig.normalize,
) -> None:
"""
writes given file to the destination file after processing for fingerprinting
Expand All @@ -290,13 +297,14 @@ def write_processed_file(
destination_file (str): file path and name to write file to
start_end (tuple(float, float), optional): Silences before and after start and end. (0, -1) Silences last second, (5.4, 0) silences first 5.4 seconds
sample_rate (int): sample rate to write file to
normalize (bool): if true, normalizes file when read
"""
filehandler.read(
filename=file_path,
wrdestination=destination_file,
start_end=start_end,
sample_rate=sample_rate,
normalize=normalize,
)


Expand Down Expand Up @@ -549,6 +557,7 @@ def _write_shifted_files(
write_extension: str,
write_multi_channel: bool = False,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
"""
Writes files to destination_path with specified shift
Expand All @@ -560,6 +569,7 @@ def _write_shifted_files(
names_and_paths (dict{str}): dict with name as key and path as value
write_multi_channel (bool): If true, only write out combined file with each input audio file being one channel. If false, write out shifted files separately and total combined file
unprocessed (bool): If true, writes files without processing. For total files, only doesn't normalize
normalize (bool): if true, normalizes file when read
"""
filehandler.shift_write_files(
files_shifts,
Expand All @@ -568,6 +578,7 @@ def _write_shifted_files(
write_extension,
write_multi_channel=write_multi_channel,
unprocessed=unprocessed,
normalize=normalize,
)


Expand All @@ -592,6 +603,7 @@ def write_shifted_file(
destination_path: str,
offset_seconds: float,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
"""
Writes file to destination_path with specified shift in seconds
Expand All @@ -602,9 +614,14 @@ def write_shifted_file(
destination_path (str): where to write file to and file name
offset_seconds (float): how many seconds to shift, can't be negative
unprocessed (bool): If true, writes files without processing.
normalize (bool): if true, normalizes file when read
"""
filehandler.shift_write_file(
file_path, destination_path, offset_seconds, unprocessed=unprocessed
file_path,
destination_path,
offset_seconds,
unprocessed=unprocessed,
normalize=normalize,
)


Expand All @@ -615,6 +632,7 @@ def write_shifts_from_results(
write_extension: str = None,
write_multi_channel: bool = False,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
"""
For writing the results of an alignment with alternate source files or unprocessed files
Expand All @@ -633,6 +651,7 @@ def write_shifts_from_results(
write_extension (str, optional): if given, all files writen with given extension
write_multi_channel (bool): If true, only write out combined file with each input audio file being one channel. If false, write out shifted files separately and total combined file
unprocessed (bool): If true, writes files without processing. For total files, only doesn't normalize
normalize (bool): if true, normalizes file when read
"""
if isinstance(read_from_dir, str):
print("Finding audio files")
Expand Down Expand Up @@ -674,6 +693,7 @@ def write_shifts_from_results(
write_extension,
write_multi_channel=write_multi_channel,
unprocessed=unprocessed,
normalize=normalize,
)
except PermissionError:
print("Permission Denied for write fine_align")
Expand All @@ -684,6 +704,7 @@ def convert_audio_file(
destination_path: str,
start_end: tuple = None,
sample_rate: int = None,
normalize: bool = BaseConfig.normalize,
):
"""
Convert audio file to type specified in destination path
Expand All @@ -694,12 +715,14 @@ def convert_audio_file(
destination_path (str): where to write file to and file name
start_end (tuple(float, float), optional): Silences before and after start and end. (0, -1) Silences last second, (5.4, 0) silences first 5.4 seconds
sample_rate (int): sample rate to write file to
normalize (bool): if true, normalizes file when read
"""
filehandler.read(
filename=file_path,
wrdestination=destination_path,
start_end=start_end,
sample_rate=sample_rate,
normalize=normalize,
)


Expand Down
5 changes: 4 additions & 1 deletion audalign/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class BaseConfig(ABC):
# Decodes audio file to this sample rate
sample_rate = 44100

# if true, normalizes all files when read
normalize = True

# keys in results dictionaries
CONFIDENCE = "confidence"
MATCH_TIME = "match_time"
Expand Down Expand Up @@ -64,4 +67,4 @@ class BaseConfig(ABC):

# used if rankings_get_top_num_match is not None. (used in visual)
# subtracts second value from ranking if num matches is above first value
rankings_num_matches_tups: typing.Optional[tuple] = None
rankings_num_matches_tups: typing.Optional[tuple] = None
50 changes: 39 additions & 11 deletions audalign/filehandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def create_audiosegment(
sample_rate=BaseConfig.sample_rate,
length=None,
unprocessed=False,
normalize=BaseConfig.normalize,
) -> AudioSegment:
if sample_rate is None:
sample_rate = BaseConfig.sample_rate
Expand All @@ -73,7 +74,8 @@ def create_audiosegment(
audiofile = audiofile.set_frame_rate(sample_rate)
audiofile = audiofile.set_sample_width(2)
audiofile = audiofile.set_channels(1)
audiofile = effects.normalize(audiofile)
if normalize:
audiofile = effects.normalize(audiofile)
else:
sample_rate = audiofile.frame_rate
if start_end is not None:
Expand Down Expand Up @@ -150,6 +152,7 @@ def read(
wrdestination=None,
start_end: tuple = None,
sample_rate=BaseConfig.sample_rate,
normalize: bool = BaseConfig.normalize,
):
"""
Reads any file supported by pydub (ffmpeg) and returns a numpy array and the bit depth
Expand All @@ -167,7 +170,7 @@ def read(
if os.path.splitext(filename)[1] in cant_read_ext:
raise CouldntDecodeError
audiofile = create_audiosegment(
filename, start_end=start_end, sample_rate=sample_rate
filename, start_end=start_end, sample_rate=sample_rate, normalize=normalize
)
data = np.frombuffer(audiofile._data, np.int16)
if wrdestination:
Expand Down Expand Up @@ -524,7 +527,9 @@ def level_by_ave(audiofile_data, index_list, overlap_array, width, exclude_min_d
return new_audio_data


def shift_get_files(results: dict, sample_rate: int = None):
def shift_get_files(
results: dict, sample_rate: int = None, normalize: bool = BaseConfig.normalize
):
names_and_paths = results.pop("names_and_paths")
temp_a = results.pop("match_info")
temp_rankings = None
Expand All @@ -538,6 +543,7 @@ def shift_get_files(results: dict, sample_rate: int = None):
None,
sample_rate=sample_rate,
return_files=True,
normalize=normalize,
)
results["names_and_paths"] = names_and_paths
results["match_info"] = temp_a
Expand All @@ -553,6 +559,7 @@ def shift_write_files(
write_extension: str,
write_multi_channel: bool = False,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
"""
Args
Expand All @@ -563,6 +570,7 @@ def shift_write_files(
write_extension (str): if given, writes all alignments with given extension (ex. ".wav" or "wav")
write_multi_channel (bool): If true, only write out combined file with each input audio file being one channel. If false, write out shifted files separately and total combined file
unprocessed (bool): If true, writes files without processing
normalize (bool): if true, normalizes file when read
"""
_shift_files(
files_shifts,
Expand All @@ -572,6 +580,7 @@ def shift_write_files(
write_multi_channel=write_multi_channel,
return_files=False,
unprocessed=unprocessed,
normalize=normalize,
)


Expand All @@ -584,6 +593,7 @@ def _shift_files(
sample_rate: int = None,
return_files: bool = False,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
if sample_rate is None:
sample_rate = BaseConfig.sample_rate
Expand All @@ -601,6 +611,7 @@ def _shift_files(
sample_rate=sample_rate,
return_files=return_files,
unprocessed=unprocessed,
normalize=normalize,
)
else:
return _shift_write_multichannel(
Expand All @@ -611,6 +622,7 @@ def _shift_files(
sample_rate=sample_rate,
return_files=return_files,
unprocessed=unprocessed,
normalize=normalize,
)


Expand All @@ -622,13 +634,15 @@ def _shift_write_separate(
sample_rate: int = None,
return_files: bool = False,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
audsegs = _shift_prepend_space_audsegs(
files_shifts=files_shifts,
names_and_paths=names_and_paths,
sample_rate=sample_rate,
return_files=return_files,
unprocessed=unprocessed,
normalize=normalize,
)
if return_files:
return audsegs
Expand Down Expand Up @@ -690,6 +704,7 @@ def _shift_prepend_space_audsegs(
sample_rate: int,
return_files: bool = False,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
audsegs = {}
for name in files_shifts.keys():
Expand All @@ -698,16 +713,16 @@ def _shift_prepend_space_audsegs(
audsegs[file_path] = files_shifts[name]
else:
audiofile = create_audiosegment(
file_path, sample_rate=sample_rate, unprocessed=unprocessed
file_path,
sample_rate=sample_rate,
unprocessed=unprocessed,
normalize=normalize,
)
if unprocessed:
sample_rate = audiofile.frame_rate
silence = AudioSegment.silent(
(files_shifts[name]) * 1000, frame_rate=sample_rate
)
audiofile = create_audiosegment(
file_path, sample_rate=sample_rate, unprocessed=unprocessed
)
audiofile: AudioSegment = silence + audiofile
audsegs[file_path] = audiofile
return audsegs
Expand Down Expand Up @@ -753,13 +768,15 @@ def _shift_write_multichannel(
sample_rate: int,
return_files: bool = False,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
audsegs = _shift_prepend_space_audsegs(
files_shifts=files_shifts,
names_and_paths=names_and_paths,
sample_rate=sample_rate,
return_files=return_files,
unprocessed=unprocessed,
normalize=normalize,
)
if return_files:
return audsegs
Expand Down Expand Up @@ -797,9 +814,15 @@ def _shift_write_multichannel(


def shift_write_file(
file_path, destination_path, offset_seconds, unprocessed: bool = False
file_path,
destination_path,
offset_seconds,
unprocessed: bool = False,
normalize: bool = BaseConfig.normalize,
):
audiofile = create_audiosegment(file_path, unprocessed=unprocessed)
audiofile = create_audiosegment(
file_path, unprocessed=unprocessed, normalize=normalize
)
sample_rate: int = audiofile.frame_rate
silence = AudioSegment.silent(offset_seconds * 1000, frame_rate=sample_rate)
audiofile = silence + audiofile
Expand All @@ -808,10 +831,15 @@ def shift_write_file(


def get_shifted_file(
file_path, offset_seconds, sample_rate=BaseConfig.sample_rate
file_path,
offset_seconds,
sample_rate=BaseConfig.sample_rate,
normalize: bool = BaseConfig.normalize,
) -> np.array:
silence = AudioSegment.silent(offset_seconds * 1000, frame_rate=sample_rate)

audiofile = create_audiosegment(file_path, sample_rate=sample_rate)
audiofile = create_audiosegment(
file_path, sample_rate=sample_rate, normalize=normalize
)
audiofile = silence + audiofile
return np.frombuffer(audiofile._data, np.int16)
Loading

0 comments on commit fe1a54a

Please sign in to comment.