Play generated audio using alsa for TTS (#482)

k2-fsa · Dec 13, 2023 · b18812c · b18812c
1 parent 9829d7c
commit b18812c
Show file tree

Hide file tree

Showing 7 changed files with 465 additions and 10 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 project(sherpa-onnx)
 
-set(SHERPA_ONNX_VERSION "1.9.3")
+set(SHERPA_ONNX_VERSION "1.9.4")
 
 # Disable warning about
 #
@@ -106,10 +106,23 @@ endif()
 set(CMAKE_CXX_EXTENSIONS OFF)
 message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
 
+
 include(CheckIncludeFileCXX)
-check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
-if(SHERPA_ONNX_HAS_ALSA)
-  add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1)
+
+if(UNIX AND NOT APPLE)
+  check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
+  if(SHERPA_ONNX_HAS_ALSA)
+    add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1)
+  else()
+    message(WARNING "\
+Could not find alsa/asoundlib.h !
+We won't build sherpa-ncnn-alsa
+To fix that, please do:
+  (1) sudo apt-get install alsa-utils libasound2-dev
+  (2) rm -rf build
+  (3) re-try
+  ")
+  endif()
 endif()
 
 check_include_file_cxx(cxxabi.h SHERPA_ONNX_HAVE_CXXABI_H)

diff --git a/cmake/cmake_extension.py b/cmake/cmake_extension.py
@@ -144,6 +144,8 @@ def build_extension(self, ext: setuptools.extension.Extension):
         binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
         binaries += ["sherpa-onnx-offline-tts"]
         binaries += ["sherpa-onnx-offline-tts-play"]
+        binaries += ["sherpa-onnx-alsa"]
+        binaries += ["sherpa-onnx-offline-tts-play-alsa"]
 
         if is_windows():
             binaries += ["kaldi-native-fbank-core.dll"]
@@ -165,6 +167,11 @@ def build_extension(self, ext: setuptools.extension.Extension):
                 src_file = install_dir / "lib" / (f + suffix)
             if not src_file.is_file():
                 src_file = install_dir / ".." / (f + suffix)
+
+            if not src_file.is_file() and 'alsa' in f:
+                print(f'Skipping {f}')
+                continue
+
             print(f"Copying {src_file} to {out_bin_dir}/")
             shutil.copy(f"{src_file}", f"{out_bin_dir}/")
 

diff --git a/setup.py b/setup.py
@@ -60,6 +60,8 @@ def get_binaries_to_install():
     binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
     binaries += ["sherpa-onnx-offline-tts"]
     binaries += ["sherpa-onnx-offline-tts-play"]
+    binaries += ["sherpa-onnx-alsa"]
+    binaries += ["sherpa-onnx-offline-tts-play-alsa"]
     if is_windows():
         binaries += ["kaldi-native-fbank-core.dll"]
         binaries += ["sherpa-onnx-c-api.dll"]

diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt
@@ -207,14 +207,42 @@ install(
 
 if(SHERPA_ONNX_HAS_ALSA)
   add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc)
-  target_link_libraries(sherpa-onnx-alsa sherpa-onnx-core)
+  add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc)
 
-  if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
-    target_link_libraries(sherpa-onnx-alsa -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
-  else()
-    target_link_libraries(sherpa-onnx-alsa asound)
+  set(exes
+    sherpa-onnx-alsa
+    sherpa-onnx-offline-tts-play-alsa
+  )
+  foreach(exe IN LISTS exes)
+    target_link_libraries(${exe} sherpa-onnx-core)
+  endforeach()
+
+  foreach(exe IN LISTS exes)
+    if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
+      target_link_libraries(${exe} -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
+    else()
+      target_link_libraries(${exe} asound)
+    endif()
+  endforeach()
+
+  if(NOT WIN32)
+    foreach(exe IN LISTS exes)
+      target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
+      target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")
+    endforeach()
+
+    if(SHERPA_ONNX_ENABLE_PYTHON)
+      foreach(exe IN LISTS exes)
+        target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
+      endforeach()
+    endif()
   endif()
-  install(TARGETS sherpa-onnx-alsa DESTINATION bin)
+
+  install(
+    TARGETS ${exes}
+    DESTINATION
+      bin
+  )
 endif()
 
 if(SHERPA_ONNX_ENABLE_PORTAUDIO)

diff --git a/sherpa-onnx/csrc/alsa-play.cc b/sherpa-onnx/csrc/alsa-play.cc
@@ -0,0 +1,150 @@
+// sherpa-onnx/csrc/alsa-play.cc
+//
+// Copyright (c)  2022-2023  Xiaomi Corporation
+
+#ifdef SHERPA_ONNX_ENABLE_ALSA
+
+#include "sherpa-onnx/csrc/alsa-play.h"
+
+#include <algorithm>
+
+namespace sherpa_onnx {
+
+AlsaPlay::AlsaPlay(const char *device_name, int32_t sample_rate) {
+  int32_t err = snd_pcm_open(&handle_, device_name, SND_PCM_STREAM_PLAYBACK, 0);
+
+  if (err) {
+    fprintf(stderr, "Unable to open: %s. %s\n", device_name, snd_strerror(err));
+    exit(-1);
+  }
+
+  SetParameters(sample_rate);
+}
+
+AlsaPlay::~AlsaPlay() {
+  if (handle_) {
+    int32_t err = snd_pcm_close(handle_);
+    if (err < 0) {
+      printf("Failed to close pcm: %s\n", snd_strerror(err));
+    }
+  }
+}
+
+void AlsaPlay::SetParameters(int32_t sample_rate) {
+  // set the following parameters
+  // 1. sample_rate
+  // 2. sample format: int16_t
+  // 3. num_channels: 1
+  snd_pcm_hw_params_t *params;
+  snd_pcm_hw_params_alloca(&params);
+  snd_pcm_hw_params_any(handle_, params);
+
+  int32_t err = snd_pcm_hw_params_set_access(handle_, params,
+                                             SND_PCM_ACCESS_RW_INTERLEAVED);
+  if (err < 0) {
+    printf("SND_PCM_ACCESS_RW_INTERLEAVED is not supported: %s\n",
+           snd_strerror(err));
+    exit(-1);
+  }
+
+  err = snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);
+
+  if (err < 0) {
+    printf("Can't set format to 16-bit: %s\n", snd_strerror(err));
+    exit(-1);
+  }
+
+  err = snd_pcm_hw_params_set_channels(handle_, params, 1);
+
+  if (err < 0) {
+    printf("Can't set channel number to 1: %s\n", snd_strerror(err));
+  }
+
+  uint32_t rate = sample_rate;
+  err = snd_pcm_hw_params_set_rate_near(handle_, params, &rate, 0);
+  if (err < 0) {
+    printf("Can't set rate to %d. %s\n", rate, snd_strerror(err));
+  }
+
+  err = snd_pcm_hw_params(handle_, params);
+  if (err < 0) {
+    printf("Can't set hardware parameters. %s\n", snd_strerror(err));
+    exit(-1);
+  }
+
+  uint32_t tmp;
+  snd_pcm_hw_params_get_rate(params, &tmp, 0);
+  int32_t actual_sample_rate = tmp;
+  if (actual_sample_rate != sample_rate) {
+    fprintf(stderr,
+            "Creating a resampler:\n"
+            "   in_sample_rate: %d\n"
+            "   output_sample_rate: %d\n",
+            sample_rate, actual_sample_rate);
+
+    float min_freq = std::min(actual_sample_rate, sample_rate);
+    float lowpass_cutoff = 0.99 * 0.5 * min_freq;
+
+    int32_t lowpass_filter_width = 6;
+    resampler_ = std::make_unique<LinearResample>(
+        sample_rate, actual_sample_rate, lowpass_cutoff, lowpass_filter_width);
+  }
+
+  snd_pcm_uframes_t frames;
+  snd_pcm_hw_params_get_period_size(params, &frames, 0);
+  buf_.resize(frames);
+}
+
+void AlsaPlay::Play(const std::vector<float> &samples) {
+  std::vector<float> tmp;
+  const float *p = samples.data();
+  int32_t num_samples = samples.size();
+  if (resampler_) {
+    resampler_->Resample(samples.data(), samples.size(), false, &tmp);
+    p = tmp.data();
+    num_samples = tmp.size();
+  }
+
+  int32_t frames = buf_.size();
+  int32_t i = 0;
+  for (; i + frames < num_samples; i += frames) {
+    for (int32_t k = 0; k != frames; ++k) {
+      buf_[k] = p[i + k] * 32767;
+    }
+
+    int32_t err = snd_pcm_writei(handle_, buf_.data(), frames);
+    if (err == -EPIPE) {
+      printf("XRUN.\n");
+      snd_pcm_prepare(handle_);
+    } else if (err < 0) {
+      printf("Can't write to PCM device: %s\n", snd_strerror(err));
+      exit(-1);
+    }
+  }
+
+  if (i < num_samples) {
+    for (int32_t k = 0; k + i < num_samples; ++k) {
+      buf_[k] = p[i + k] * 32767;
+    }
+
+    int32_t err = snd_pcm_writei(handle_, buf_.data(), num_samples - i);
+    if (err == -EPIPE) {
+      printf("XRUN.\n");
+      snd_pcm_prepare(handle_);
+    } else if (err < 0) {
+      printf("Can't write to PCM device: %s\n", snd_strerror(err));
+      exit(-1);
+    }
+  }
+}
+
+void AlsaPlay::Drain() {
+  int32_t err = snd_pcm_drain(handle_);
+  if (err < 0) {
+    printf("Failed to drain pcm. %s\n", snd_strerror(err));
+  }
+}
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_ENABLE_ALSA
diff --git a/sherpa-onnx/csrc/alsa-play.h b/sherpa-onnx/csrc/alsa-play.h
@@ -0,0 +1,37 @@
+// sherpa-onnx/csrc/alsa-play.h
+//
+// Copyright (c)  2022-2023  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_ALSA_PLAY_H_
+#define SHERPA_ONNX_CSRC_ALSA_PLAY_H_
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "alsa/asoundlib.h"
+#include "sherpa-onnx/csrc/resample.h"
+
+namespace sherpa_onnx {
+
+class AlsaPlay {
+ public:
+  AlsaPlay(const char *device_name, int32_t sample_rate);
+  ~AlsaPlay();
+  void Play(const std::vector<float> &samples);
+
+  // wait for all the samples to be played
+  void Drain();
+
+ private:
+  void SetParameters(int32_t sample_rate);
+
+ private:
+  snd_pcm_t *handle_ = nullptr;
+  std::unique_ptr<LinearResample> resampler_;
+  std::vector<int16_t> buf_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_ALSA_PLAY_H_