diff --git a/audiofile/core/convert.py b/audiofile/core/convert.py index dde7873..2fabb09 100644 --- a/audiofile/core/convert.py +++ b/audiofile/core/convert.py @@ -11,6 +11,7 @@ def convert( outfile: str, offset: float = 0, duration: float = None, + sampling_rate: int = None, ): """Convert any audio/video file to WAV. @@ -19,16 +20,17 @@ def convert( outfile: WAV file name duration: return only a specified duration in seconds offset: start reading at offset in seconds + sampling_rate: sampling rate in Hz """ try: # Convert to WAV file with sox - run_sox(infile, outfile, offset, duration) + run_sox(infile, outfile, offset, duration, sampling_rate) except (FileNotFoundError, subprocess.CalledProcessError): try: # Convert to WAV file with ffmpeg - run_ffmpeg(infile, outfile, offset, duration) - except FileNotFoundError: - raise binary_missing_error("ffmpeg") - except subprocess.CalledProcessError: - raise broken_file_error(infile) + run_ffmpeg(infile, outfile, offset, duration, sampling_rate) + except FileNotFoundError as e: # pragma: no cover + raise binary_missing_error("ffmpeg") from e + except subprocess.CalledProcessError as e: # pragma: no cover + raise broken_file_error(infile) from e diff --git a/audiofile/core/io.py b/audiofile/core/io.py index f7ad518..72ea0f5 100644 --- a/audiofile/core/io.py +++ b/audiofile/core/io.py @@ -254,6 +254,7 @@ def read( """ # noqa: E501 file = audeer.safe_path(file) + sampling_rate = None # Parse offset and duration values if ( @@ -384,7 +385,16 @@ def read( offset /= sampling_rate if duration is not None and duration != 0: duration /= sampling_rate - convert(file, tmpfile, offset, duration) + if sampling_rate is None: + # Infer sampling rate using mediainfo before conversion, + # as ffmpeg does ignore the original sampling rate for opus files, + # see: + # * https://trac.ffmpeg.org/ticket/5240 + # * https://github.com/audeering/audiofile/issues/157 + from audiofile.core.info import sampling_rate as get_sampling_rate + + sampling_rate = get_sampling_rate(file) + convert(file, tmpfile, offset, duration, sampling_rate) signal, sampling_rate = soundfile.read( tmpfile, dtype=dtype, diff --git a/audiofile/core/utils.py b/audiofile/core/utils.py index 9f037ce..ff2ea52 100644 --- a/audiofile/core/utils.py +++ b/audiofile/core/utils.py @@ -91,19 +91,23 @@ def run(shell_command): return "" -def run_ffmpeg(infile, outfile, offset, duration): +def run_ffmpeg(infile, outfile, offset, duration, sampling_rate): """Convert audio file to WAV file.""" + cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, outfile] if duration: - cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, "-t", str(duration), outfile] - else: - cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, outfile] + cmd.insert(-1, "-t") + cmd.insert(-1, str(duration)) + if sampling_rate: + cmd.insert(-1, "-ar") + cmd.insert(-1, str(sampling_rate)) run(cmd) -def run_sox(infile, outfile, offset, duration): +def run_sox(infile, outfile, offset, duration, sampling_rate): """Convert audio file to WAV file.""" + cmd = ["sox", infile, outfile, "trim", str(offset)] if duration: - cmd = ["sox", infile, outfile, "trim", str(offset), str(duration)] - else: - cmd = ["sox", infile, outfile, "trim", str(offset)] + cmd.append(str(duration)) + if sampling_rate: + cmd += ["rate", str(sampling_rate)] run(cmd) diff --git a/tests/assets/README.md b/tests/assets/README.md index dd40765..e66db7a 100644 --- a/tests/assets/README.md +++ b/tests/assets/README.md @@ -15,6 +15,13 @@ Kevin MacLeod (incompetech.com), licensed under Creative Commons: [CC-BY-3.0](http://creativecommons.org/licenses/by/3.0/). +We converted the file `gs-16b-1c-44100hz.opus` +(which was stored wrongly with 48000 Hz) +to `gs-16b-1c-16000hz.opus` using +```bash +ffmpeg -y -i gs-16b-1c-44100hz.opus -ac 1 -ar 16000 gs-16b-1c-16000hz-fixed.opus +``` + ## Video test files The folder contains the video file `video.mp4`, diff --git a/tests/assets/gs-16b-1c-16000hz.opus b/tests/assets/gs-16b-1c-16000hz.opus new file mode 100644 index 0000000..2016986 Binary files /dev/null and b/tests/assets/gs-16b-1c-16000hz.opus differ diff --git a/tests/assets/gs-16b-1c-44100hz.opus b/tests/assets/gs-16b-1c-44100hz.opus deleted file mode 100644 index 40a1e11..0000000 Binary files a/tests/assets/gs-16b-1c-44100hz.opus and /dev/null differ diff --git a/tests/test_audiofile.py b/tests/test_audiofile.py index 9863663..50f59e0 100644 --- a/tests/test_audiofile.py +++ b/tests/test_audiofile.py @@ -202,24 +202,24 @@ def test_empty_file(tmpdir, convert, empty_file): def test_missing_binaries(tmpdir, hide_system_path, empty_file): expected_error = FileNotFoundError # Reading file - with pytest.raises(expected_error, match="ffmpeg"): + with pytest.raises(expected_error, match="mediainfo"): signal, sampling_rate = af.read(empty_file) # Metadata with pytest.raises(expected_error, match="mediainfo"): af.channels(empty_file) - with pytest.raises(expected_error, match="ffmpeg"): + with pytest.raises(expected_error, match="mediainfo"): af.duration(empty_file) with pytest.raises(expected_error, match="mediainfo"): af.duration(empty_file, sloppy=True) with pytest.raises(expected_error, match="mediainfo"): af.has_video(empty_file) - with pytest.raises(expected_error, match="ffmpeg"): + with pytest.raises(expected_error, match="mediainfo"): af.samples(empty_file) with pytest.raises(expected_error, match="mediainfo"): af.sampling_rate(empty_file) # Convert - with pytest.raises(expected_error, match="ffmpeg"): + with pytest.raises(expected_error, match="mediainfo"): converted_file = str(tmpdir.join("signal-converted.wav")) af.convert_to_wav(empty_file, converted_file) @@ -512,7 +512,7 @@ def test_file_type(tmpdir, file_type, magnitude, sampling_rate, channels): @pytest.mark.parametrize( "file, header_duration, audio, video", # header duration as given by mediainfo [ - ("gs-16b-1c-44100hz.opus", 15.839, True, False), + ("gs-16b-1c-16000hz.opus", 15.839, True, False), ("gs-16b-1c-8000hz.amr", 15.840000, True, False), ("gs-16b-1c-44100hz.m4a", 15.833, True, False), ("gs-16b-1c-44100hz.aac", None, True, False), @@ -1243,7 +1243,7 @@ def test_read_duration_and_offset_rounding( # when reading with sox or ffmpeg # soundfile - signal, _ = af.read(audio_file, offset=offset, duration=duration) + signal, sampling_rate = af.read(audio_file, offset=offset, duration=duration) np.testing.assert_allclose( signal, np.array(expected, dtype=np.float32), @@ -1259,7 +1259,7 @@ def test_read_duration_and_offset_rounding( # sox convert_file = str(tmpdir.join("signal-sox.wav")) try: - af.core.utils.run_sox(audio_file, convert_file, offset, duration) + af.core.utils.run_sox(audio_file, convert_file, offset, duration, sampling_rate) signal, _ = af.read(convert_file) np.testing.assert_allclose( signal, @@ -1272,7 +1272,7 @@ def test_read_duration_and_offset_rounding( # ffmpeg convert_file = str(tmpdir.join("signal-ffmpeg.wav")) - af.core.utils.run_ffmpeg(audio_file, convert_file, offset, duration) + af.core.utils.run_ffmpeg(audio_file, convert_file, offset, duration, sampling_rate) signal, _ = af.read(convert_file) np.testing.assert_allclose( signal,