Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sampling rate for reading opus files #158

Merged
merged 5 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions audiofile/core/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def convert(
outfile: str,
offset: float = 0,
duration: float = None,
sampling_rate: int = None,
):
"""Convert any audio/video file to WAV.

Expand All @@ -19,16 +20,17 @@ def convert(
outfile: WAV file name
duration: return only a specified duration in seconds
offset: start reading at offset in seconds
sampling_rate: sampling rate in Hz

"""
try:
# Convert to WAV file with sox
run_sox(infile, outfile, offset, duration)
run_sox(infile, outfile, offset, duration, sampling_rate)
except (FileNotFoundError, subprocess.CalledProcessError):
try:
# Convert to WAV file with ffmpeg
run_ffmpeg(infile, outfile, offset, duration)
except FileNotFoundError:
raise binary_missing_error("ffmpeg")
except subprocess.CalledProcessError:
raise broken_file_error(infile)
run_ffmpeg(infile, outfile, offset, duration, sampling_rate)
except FileNotFoundError as e: # pragma: no cover
raise binary_missing_error("ffmpeg") from e
except subprocess.CalledProcessError as e: # pragma: no cover
raise broken_file_error(infile) from e
12 changes: 11 additions & 1 deletion audiofile/core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def read(

""" # noqa: E501
file = audeer.safe_path(file)
sampling_rate = None

# Parse offset and duration values
if (
Expand Down Expand Up @@ -384,7 +385,16 @@ def read(
offset /= sampling_rate
if duration is not None and duration != 0:
duration /= sampling_rate
convert(file, tmpfile, offset, duration)
if sampling_rate is None:
ChristianGeng marked this conversation as resolved.
Show resolved Hide resolved
# Infer sampling rate using mediainfo before conversion,
# as ffmpeg does ignore the original sampling rate for opus files,
# see:
# * https://trac.ffmpeg.org/ticket/5240
# * https://github.com/audeering/audiofile/issues/157
from audiofile.core.info import sampling_rate as get_sampling_rate

sampling_rate = get_sampling_rate(file)
convert(file, tmpfile, offset, duration, sampling_rate)
signal, sampling_rate = soundfile.read(
tmpfile,
dtype=dtype,
Expand Down
20 changes: 12 additions & 8 deletions audiofile/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,19 +91,23 @@ def run(shell_command):
return ""


def run_ffmpeg(infile, outfile, offset, duration):
def run_ffmpeg(infile, outfile, offset, duration, sampling_rate):
"""Convert audio file to WAV file."""
cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, outfile]
if duration:
cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, "-t", str(duration), outfile]
else:
cmd = ["ffmpeg", "-ss", str(offset), "-i", infile, outfile]
cmd.insert(-1, "-t")
cmd.insert(-1, str(duration))
if sampling_rate:
cmd.insert(-1, "-ar")
cmd.insert(-1, str(sampling_rate))
run(cmd)


def run_sox(infile, outfile, offset, duration):
def run_sox(infile, outfile, offset, duration, sampling_rate):
"""Convert audio file to WAV file."""
cmd = ["sox", infile, outfile, "trim", str(offset)]
if duration:
cmd = ["sox", infile, outfile, "trim", str(offset), str(duration)]
else:
cmd = ["sox", infile, outfile, "trim", str(offset)]
cmd.append(str(duration))
if sampling_rate:
cmd += ["rate", str(sampling_rate)]
run(cmd)
7 changes: 7 additions & 0 deletions tests/assets/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ Kevin MacLeod (incompetech.com),
licensed under Creative Commons:
[CC-BY-3.0](http://creativecommons.org/licenses/by/3.0/).

We converted the file `gs-16b-1c-44100hz.opus`
ChristianGeng marked this conversation as resolved.
Show resolved Hide resolved
(which was stored wrongly with 48000 Hz)
to `gs-16b-1c-16000hz.opus` using
```bash
ffmpeg -y -i gs-16b-1c-44100hz.opus -ac 1 -ar 16000 gs-16b-1c-16000hz-fixed.opus
```

## Video test files

The folder contains the video file `video.mp4`,
Expand Down
Binary file added tests/assets/gs-16b-1c-16000hz.opus
Binary file not shown.
Binary file removed tests/assets/gs-16b-1c-44100hz.opus
Binary file not shown.
16 changes: 8 additions & 8 deletions tests/test_audiofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,24 +202,24 @@ def test_empty_file(tmpdir, convert, empty_file):
def test_missing_binaries(tmpdir, hide_system_path, empty_file):
expected_error = FileNotFoundError
# Reading file
with pytest.raises(expected_error, match="ffmpeg"):
with pytest.raises(expected_error, match="mediainfo"):
signal, sampling_rate = af.read(empty_file)
# Metadata
with pytest.raises(expected_error, match="mediainfo"):
af.channels(empty_file)
with pytest.raises(expected_error, match="ffmpeg"):
with pytest.raises(expected_error, match="mediainfo"):
af.duration(empty_file)
with pytest.raises(expected_error, match="mediainfo"):
af.duration(empty_file, sloppy=True)
with pytest.raises(expected_error, match="mediainfo"):
af.has_video(empty_file)
with pytest.raises(expected_error, match="ffmpeg"):
with pytest.raises(expected_error, match="mediainfo"):
af.samples(empty_file)
with pytest.raises(expected_error, match="mediainfo"):
af.sampling_rate(empty_file)

# Convert
with pytest.raises(expected_error, match="ffmpeg"):
with pytest.raises(expected_error, match="mediainfo"):
converted_file = str(tmpdir.join("signal-converted.wav"))
af.convert_to_wav(empty_file, converted_file)

Expand Down Expand Up @@ -512,7 +512,7 @@ def test_file_type(tmpdir, file_type, magnitude, sampling_rate, channels):
@pytest.mark.parametrize(
"file, header_duration, audio, video", # header duration as given by mediainfo
[
("gs-16b-1c-44100hz.opus", 15.839, True, False),
("gs-16b-1c-16000hz.opus", 15.839, True, False),
("gs-16b-1c-8000hz.amr", 15.840000, True, False),
("gs-16b-1c-44100hz.m4a", 15.833, True, False),
("gs-16b-1c-44100hz.aac", None, True, False),
Expand Down Expand Up @@ -1243,7 +1243,7 @@ def test_read_duration_and_offset_rounding(
# when reading with sox or ffmpeg

# soundfile
signal, _ = af.read(audio_file, offset=offset, duration=duration)
signal, sampling_rate = af.read(audio_file, offset=offset, duration=duration)
np.testing.assert_allclose(
signal,
np.array(expected, dtype=np.float32),
Expand All @@ -1259,7 +1259,7 @@ def test_read_duration_and_offset_rounding(
# sox
convert_file = str(tmpdir.join("signal-sox.wav"))
try:
af.core.utils.run_sox(audio_file, convert_file, offset, duration)
af.core.utils.run_sox(audio_file, convert_file, offset, duration, sampling_rate)
signal, _ = af.read(convert_file)
np.testing.assert_allclose(
signal,
Expand All @@ -1272,7 +1272,7 @@ def test_read_duration_and_offset_rounding(

# ffmpeg
convert_file = str(tmpdir.join("signal-ffmpeg.wav"))
af.core.utils.run_ffmpeg(audio_file, convert_file, offset, duration)
af.core.utils.run_ffmpeg(audio_file, convert_file, offset, duration, sampling_rate)
signal, _ = af.read(convert_file)
np.testing.assert_allclose(
signal,
Expand Down
Loading