diff --git a/conch/analysis/autovot.py b/conch/analysis/autovot.py new file mode 100644 index 0000000..397e0c0 --- /dev/null +++ b/conch/analysis/autovot.py @@ -0,0 +1,87 @@ +from .functions import BaseAnalysisFunction +import wave +import subprocess +import textgrid +import os +import tempfile + + +def is_autovot_friendly_file(sound_file): + rate = subprocess.run(["soxi", "-r", sound_file], encoding="UTF-8", stdout=subprocess.PIPE).stdout + if int(rate) != 16000: + return False + + channels = subprocess.run(["soxi", "-c", sound_file], encoding="UTF-8", stdout=subprocess.PIPE).stdout + if int(channels) != 1: + return False + return True + +def resample_for_autovot(soundfile, tmpdir): + output_file = os.path.join(tmpdir, "sound_file.wav") + subprocess.call(["sox", soundfile, "-c", "1", "-r", "16000", output_file]) + return output_file + + +class MeasureVOTPretrained(object): + def __init__(self, classifier_to_use=None, min_vot_length=15, max_vot_length=250, window_max=30, window_min=30, debug=False): + if classifier_to_use is None: + raise ValueError("There must be a classifier to run AutoVOT") + else: + self.classifier_to_use = classifier_to_use + self.min_vot_length = min_vot_length + self.max_vot_length = max_vot_length + self.debug = debug + self.window_max = window_max + self.window_min = window_min + + def __call__(self, segment): + file_path = os.path.expanduser(segment["file_path"]) + begin = segment["begin"] + end = segment["end"] + vot_marks = sorted(segment["vot_marks"], key=lambda x: x[0]) + grid = textgrid.TextGrid(maxTime=end) + vot_tier = textgrid.IntervalTier(name='vot', maxTime=end) + for vot_begin, vot_end, *extra_data in vot_marks: + vot_tier.add(vot_begin, vot_end, 'vot') + grid.append(vot_tier) + with tempfile.TemporaryDirectory() as tmpdirname: + grid_path = "{}/file.TextGrid".format(tmpdirname) + csv_path = "{}/file.csv".format(tmpdirname) + wav_filenames = "{}/wavs.txt".format(tmpdirname) + textgrid_filenames = "{}/textgrids.txt".format(tmpdirname) + + if not is_autovot_friendly_file(file_path): + file_path = resample_for_autovot(file_path, tmpdirname) + + with open(wav_filenames, 'w') as f: + f.write("{}\n".format(file_path)) + + with open(textgrid_filenames, 'w') as f: + f.write("{}\n".format(grid_path)) + + grid.write(grid_path) + + if self.debug: + grid.write('/tmp/textgrid_from_conch.csv') + with open('/tmp/alt_wordlist.txt', 'w') as f: + f.write("{}\n".format('/tmp/textgrid_from_conch.csv')) + subprocess.run(["auto_vot_decode.py", wav_filenames, '/tmp/alt_wordlist.txt', self.classifier_to_use, '--vot_tier', 'vot', '--vot_mark', 'vot', "--min_vot_length", str(self.min_vot_length), "--max_vot_length", str(self.max_vot_length), "--window_max", str(self.window_max), "--window_min", str(self.window_min)]) + subprocess.run(["auto_vot_decode.py", wav_filenames, textgrid_filenames, self.classifier_to_use, '--vot_tier', 'vot', '--vot_mark', 'vot', '--csv_file', csv_path, "--min_vot_length", str(self.min_vot_length), "--max_vot_length", str(self.max_vot_length), "--window_max", str(self.window_max), "--window_min", str(self.window_min)]) + + return_list = [] + with open(csv_path, "r") as f: + f.readline() + for l, (b, e, *extra_data) in zip(f, vot_marks): + _, time, vot, confidence = l.split(',') + if "neg 0\n" == confidence: + confidence = 0 + return_list.append((float(time), float(vot), float(confidence), *extra_data)) + return return_list + +class AutoVOTAnalysisFunction(BaseAnalysisFunction): + def __init__(self, classifier_to_use=None, min_vot_length=15, max_vot_length=250, window_max=30, window_min=30, debug=False, arguments=None): + super(AutoVOTAnalysisFunction, self).__init__() + self._function = MeasureVOTPretrained(classifier_to_use=classifier_to_use, min_vot_length=min_vot_length, max_vot_length=max_vot_length, window_max=window_max, window_min=window_min, debug=debug) + self.requires_file = True + self.uses_segments = True + self.requires_segment_as_arg = True diff --git a/conch/analysis/functions.py b/conch/analysis/functions.py index c5840b2..6701974 100644 --- a/conch/analysis/functions.py +++ b/conch/analysis/functions.py @@ -15,6 +15,7 @@ class BaseAnalysisFunction(object): def __init__(self): self._function = print self.requires_file = False + self.requires_segment_as_arg = False self.uses_segments = False self.arguments = [] @@ -39,6 +40,8 @@ def __call__(self, segment): elif isinstance(segment, str) and not self.requires_file: signal, sr = librosa.load(safe_path(segment)) return self._function(signal, sr, *self.arguments) + elif isinstance(segment, FileSegment) and self.requires_segment_as_arg: + return self._function(segment, *self.arguments) elif isinstance(segment, FileSegment) and self.requires_file and not self.uses_segments: beg, end = segment.begin, segment.end padding = segment['padding'] diff --git a/conch/analysis/segments.py b/conch/analysis/segments.py index 32d26b4..b90ea9b 100644 --- a/conch/analysis/segments.py +++ b/conch/analysis/segments.py @@ -44,6 +44,8 @@ def __eq__(self, other): return False if self.channel != other.channel: return False + if self.properties != other.properties: + return False return True def __lt__(self, other): diff --git a/tests/conftest.py b/tests/conftest.py index 73756e3..98e68ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,6 +37,9 @@ def praat_script_test_dir(test_dir): def soundfiles_dir(test_dir): return os.path.join(test_dir, 'soundfiles') +@pytest.fixture(scope='session') +def autovot_dir(test_dir): + return os.path.join(test_dir, 'autovot') @pytest.fixture(scope='session') def tts_dir(test_dir): @@ -62,7 +65,6 @@ def y_path(soundfiles_dir): def acoustic_corpus_path(soundfiles_dir): return os.path.join(soundfiles_dir, 'acoustic_corpus.wav') - @pytest.fixture(scope='session') def call_back(): def function(*args): @@ -87,6 +89,22 @@ def base_filenames(soundfiles_dir): if x.endswith('.wav')] return filenames +@pytest.fixture(scope='session') +def autovot_markings(test_dir): + vot_markings = [] + with open(os.path.join(test_dir, "vot_marks"), "r") as f: + for x in f: + vots = x.split(' ') + vot_markings.append((float(vots[0]), float(vots[1]))) + return vot_markings + +@pytest.fixture(scope='session') +def classifier_path(test_dir): + return os.path.join(test_dir, "vot_model", "sotc_voiceless.classifier") + +@pytest.fixture(scope='session') +def autovot_correct_times(): + return [(1.593, 0.056, 180.344), (1.828, 0.008, 126.073), (1.909, 0.071, 90.8671), (2.041, 0.005, 45.6481), (2.687, 0.016, 212.67), (2.859, 0.005, 22.646), (2.951, 0.005, 78.2495), (3.351, 0.052, 84.7406), (5.574, 0.02, 96.0191), (6.212, 0.01, 72.1773), (6.736, 0.02, 114.721), (7.02, 0.029, 224.901), (9.255, 0.032, 123.367), (9.498, 0.017, 92.7151), (11.424, 0.056, 85.1062), (13.144, 0.012, 191.111), (13.55, 0.012, 59.8446), (25.125, 0.014, 165.632)] @pytest.fixture(scope='session') def praatpath(): @@ -114,7 +132,6 @@ def formants_func(): window_length=0.025) return func - @pytest.fixture(scope='session') def pitch_func(): func = PitchTrackFunction(min_pitch=50, max_pitch=500, time_step=0.01) diff --git a/tests/data/vot_marks b/tests/data/vot_marks new file mode 100644 index 0000000..cf90c99 --- /dev/null +++ b/tests/data/vot_marks @@ -0,0 +1,18 @@ +1.50346 1.65870 +1.85687 1.90566 +1.90566 1.98664 +2.06736 2.14425 +2.65964 2.70424 +2.78255 2.85937 +2.93352 2.95891 +3.32791 3.38926 +5.53054 5.60142 +6.18190 6.23417 +6.76380 6.79755 +6.89678 7.06540 +9.21705 9.32077 +9.44277 9.50559 +11.44430 11.50088 +13.10701 13.22000 +13.51863 13.55111 +25.09728 25.14633 diff --git a/tests/data/vot_model/sotc_voiceless.classifier.neg b/tests/data/vot_model/sotc_voiceless.classifier.neg new file mode 100644 index 0000000..c8f9519 --- /dev/null +++ b/tests/data/vot_model/sotc_voiceless.classifier.neg @@ -0,0 +1 @@ +59 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ No newline at end of file diff --git a/tests/data/vot_model/sotc_voiceless.classifier.pos b/tests/data/vot_model/sotc_voiceless.classifier.pos new file mode 100644 index 0000000..f2713f9 --- /dev/null +++ b/tests/data/vot_model/sotc_voiceless.classifier.pos @@ -0,0 +1 @@ +77 -5.19517 0.974839 8.37886 4.43399 -0.291111 -1.03706 -0.851605 -2.99899 -0.988911 0.238412 -6.02394 7.69937 1.91178 12.1159 17.234 3.06435 -4.58158 -6.52092 0.704001 -9.8874 -4.83043 -1.23498 4.27573 -2.88435 -4.91017 -3.34528 -1.34791 11.5056 28.1743 1.4589 0.965129 -11.9287 0.431235 0.497913 -4.02346 -7.29419 -1.02066 12.9648 0.510423 0.892902 -2.05145 -1.73262 -8.13197 14.6691 5.57997 0.225761 5.1901 -7.44362 21.9976 0.278337 1.93388 -36.4286 10.1835 -3.78564 -4.12507 -17.2654 -1.25228 -18.5517 -51.1295 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ No newline at end of file diff --git a/tests/test_analysis_autovot.py b/tests/test_analysis_autovot.py new file mode 100644 index 0000000..4e97c07 --- /dev/null +++ b/tests/test_analysis_autovot.py @@ -0,0 +1,20 @@ +from conch.analysis.autovot import AutoVOTAnalysisFunction +import librosa +from statistics import mean +import wave +import pytest +from conch.analysis.segments import SegmentMapping +from conch import analyze_segments + + +def test_autovot(acoustic_corpus_path, autovot_markings, classifier_path, autovot_correct_times): + mapping = SegmentMapping() + with wave.open(acoustic_corpus_path, 'r') as f: + length = f.getnframes() / float(f.getframerate()) + mapping.add_file_segment(acoustic_corpus_path, 0, length, channel=0, vot_marks=autovot_markings) + func = AutoVOTAnalysisFunction(classifier_to_use=classifier_path, window_min=-30, window_max=30, min_vot_length=5, max_vot_length=100) + output = analyze_segments(mapping, func, multiprocessing=False) + output = output[mapping[0]] + for o, truth in zip(output, autovot_correct_times): + assert o == truth +