Skip to content

Commit

Permalink
Play generated audio using alsa for TTS (#482)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Dec 13, 2023
1 parent 9829d7c commit b18812c
Show file tree
Hide file tree
Showing 7 changed files with 465 additions and 10 deletions.
21 changes: 17 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)

set(SHERPA_ONNX_VERSION "1.9.3")
set(SHERPA_ONNX_VERSION "1.9.4")

# Disable warning about
#
Expand Down Expand Up @@ -106,10 +106,23 @@ endif()
set(CMAKE_CXX_EXTENSIONS OFF)
message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")


include(CheckIncludeFileCXX)
check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
if(SHERPA_ONNX_HAS_ALSA)
add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1)

if(UNIX AND NOT APPLE)
check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
if(SHERPA_ONNX_HAS_ALSA)
add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1)
else()
message(WARNING "\
Could not find alsa/asoundlib.h !
We won't build sherpa-ncnn-alsa
To fix that, please do:
(1) sudo apt-get install alsa-utils libasound2-dev
(2) rm -rf build
(3) re-try
")
endif()
endif()

check_include_file_cxx(cxxabi.h SHERPA_ONNX_HAVE_CXXABI_H)
Expand Down
7 changes: 7 additions & 0 deletions cmake/cmake_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ def build_extension(self, ext: setuptools.extension.Extension):
binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
binaries += ["sherpa-onnx-offline-tts"]
binaries += ["sherpa-onnx-offline-tts-play"]
binaries += ["sherpa-onnx-alsa"]
binaries += ["sherpa-onnx-offline-tts-play-alsa"]

if is_windows():
binaries += ["kaldi-native-fbank-core.dll"]
Expand All @@ -165,6 +167,11 @@ def build_extension(self, ext: setuptools.extension.Extension):
src_file = install_dir / "lib" / (f + suffix)
if not src_file.is_file():
src_file = install_dir / ".." / (f + suffix)

if not src_file.is_file() and 'alsa' in f:
print(f'Skipping {f}')
continue

print(f"Copying {src_file} to {out_bin_dir}/")
shutil.copy(f"{src_file}", f"{out_bin_dir}/")

Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def get_binaries_to_install():
binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
binaries += ["sherpa-onnx-offline-tts"]
binaries += ["sherpa-onnx-offline-tts-play"]
binaries += ["sherpa-onnx-alsa"]
binaries += ["sherpa-onnx-offline-tts-play-alsa"]
if is_windows():
binaries += ["kaldi-native-fbank-core.dll"]
binaries += ["sherpa-onnx-c-api.dll"]
Expand Down
40 changes: 34 additions & 6 deletions sherpa-onnx/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -207,14 +207,42 @@ install(

if(SHERPA_ONNX_HAS_ALSA)
add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc)
target_link_libraries(sherpa-onnx-alsa sherpa-onnx-core)
add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc)

if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
target_link_libraries(sherpa-onnx-alsa -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
else()
target_link_libraries(sherpa-onnx-alsa asound)
set(exes
sherpa-onnx-alsa
sherpa-onnx-offline-tts-play-alsa
)
foreach(exe IN LISTS exes)
target_link_libraries(${exe} sherpa-onnx-core)
endforeach()

foreach(exe IN LISTS exes)
if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
target_link_libraries(${exe} -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
else()
target_link_libraries(${exe} asound)
endif()
endforeach()

if(NOT WIN32)
foreach(exe IN LISTS exes)
target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")
endforeach()

if(SHERPA_ONNX_ENABLE_PYTHON)
foreach(exe IN LISTS exes)
target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
endforeach()
endif()
endif()
install(TARGETS sherpa-onnx-alsa DESTINATION bin)

install(
TARGETS ${exes}
DESTINATION
bin
)
endif()

if(SHERPA_ONNX_ENABLE_PORTAUDIO)
Expand Down
150 changes: 150 additions & 0 deletions sherpa-onnx/csrc/alsa-play.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// sherpa-onnx/csrc/alsa-play.cc
//
// Copyright (c) 2022-2023 Xiaomi Corporation

#ifdef SHERPA_ONNX_ENABLE_ALSA

#include "sherpa-onnx/csrc/alsa-play.h"

#include <algorithm>

namespace sherpa_onnx {

AlsaPlay::AlsaPlay(const char *device_name, int32_t sample_rate) {
int32_t err = snd_pcm_open(&handle_, device_name, SND_PCM_STREAM_PLAYBACK, 0);

if (err) {
fprintf(stderr, "Unable to open: %s. %s\n", device_name, snd_strerror(err));
exit(-1);
}

SetParameters(sample_rate);
}

AlsaPlay::~AlsaPlay() {
if (handle_) {
int32_t err = snd_pcm_close(handle_);
if (err < 0) {
printf("Failed to close pcm: %s\n", snd_strerror(err));
}
}
}

void AlsaPlay::SetParameters(int32_t sample_rate) {
// set the following parameters
// 1. sample_rate
// 2. sample format: int16_t
// 3. num_channels: 1
snd_pcm_hw_params_t *params;
snd_pcm_hw_params_alloca(&params);
snd_pcm_hw_params_any(handle_, params);

int32_t err = snd_pcm_hw_params_set_access(handle_, params,
SND_PCM_ACCESS_RW_INTERLEAVED);
if (err < 0) {
printf("SND_PCM_ACCESS_RW_INTERLEAVED is not supported: %s\n",
snd_strerror(err));
exit(-1);
}

err = snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);

if (err < 0) {
printf("Can't set format to 16-bit: %s\n", snd_strerror(err));
exit(-1);
}

err = snd_pcm_hw_params_set_channels(handle_, params, 1);

if (err < 0) {
printf("Can't set channel number to 1: %s\n", snd_strerror(err));
}

uint32_t rate = sample_rate;
err = snd_pcm_hw_params_set_rate_near(handle_, params, &rate, 0);
if (err < 0) {
printf("Can't set rate to %d. %s\n", rate, snd_strerror(err));
}

err = snd_pcm_hw_params(handle_, params);
if (err < 0) {
printf("Can't set hardware parameters. %s\n", snd_strerror(err));
exit(-1);
}

uint32_t tmp;
snd_pcm_hw_params_get_rate(params, &tmp, 0);
int32_t actual_sample_rate = tmp;
if (actual_sample_rate != sample_rate) {
fprintf(stderr,
"Creating a resampler:\n"
" in_sample_rate: %d\n"
" output_sample_rate: %d\n",
sample_rate, actual_sample_rate);

float min_freq = std::min(actual_sample_rate, sample_rate);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;

int32_t lowpass_filter_width = 6;
resampler_ = std::make_unique<LinearResample>(
sample_rate, actual_sample_rate, lowpass_cutoff, lowpass_filter_width);
}

snd_pcm_uframes_t frames;
snd_pcm_hw_params_get_period_size(params, &frames, 0);
buf_.resize(frames);
}

void AlsaPlay::Play(const std::vector<float> &samples) {
std::vector<float> tmp;
const float *p = samples.data();
int32_t num_samples = samples.size();
if (resampler_) {
resampler_->Resample(samples.data(), samples.size(), false, &tmp);
p = tmp.data();
num_samples = tmp.size();
}

int32_t frames = buf_.size();
int32_t i = 0;
for (; i + frames < num_samples; i += frames) {
for (int32_t k = 0; k != frames; ++k) {
buf_[k] = p[i + k] * 32767;
}

int32_t err = snd_pcm_writei(handle_, buf_.data(), frames);
if (err == -EPIPE) {
printf("XRUN.\n");
snd_pcm_prepare(handle_);
} else if (err < 0) {
printf("Can't write to PCM device: %s\n", snd_strerror(err));
exit(-1);
}
}

if (i < num_samples) {
for (int32_t k = 0; k + i < num_samples; ++k) {
buf_[k] = p[i + k] * 32767;
}

int32_t err = snd_pcm_writei(handle_, buf_.data(), num_samples - i);
if (err == -EPIPE) {
printf("XRUN.\n");
snd_pcm_prepare(handle_);
} else if (err < 0) {
printf("Can't write to PCM device: %s\n", snd_strerror(err));
exit(-1);
}
}
}

void AlsaPlay::Drain() {
int32_t err = snd_pcm_drain(handle_);
if (err < 0) {
printf("Failed to drain pcm. %s\n", snd_strerror(err));
}
}

} // namespace sherpa_onnx

#endif // SHERPA_ONNX_ENABLE_ALSA
37 changes: 37 additions & 0 deletions sherpa-onnx/csrc/alsa-play.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// sherpa-onnx/csrc/alsa-play.h
//
// Copyright (c) 2022-2023 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ALSA_PLAY_H_
#define SHERPA_ONNX_CSRC_ALSA_PLAY_H_

#include <cstdint>
#include <memory>
#include <vector>

#include "alsa/asoundlib.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

class AlsaPlay {
public:
AlsaPlay(const char *device_name, int32_t sample_rate);
~AlsaPlay();
void Play(const std::vector<float> &samples);

// wait for all the samples to be played
void Drain();

private:
void SetParameters(int32_t sample_rate);

private:
snd_pcm_t *handle_ = nullptr;
std::unique_ptr<LinearResample> resampler_;
std::vector<int16_t> buf_;
};

} // namespace sherpa_onnx

#endif // SHERPA_ONNX_CSRC_ALSA_PLAY_H_
Loading

0 comments on commit b18812c

Please sign in to comment.