Skip to content

Commit

Permalink
encoder only trt ep for transducer (#1130)
Browse files Browse the repository at this point in the history
  • Loading branch information
manickavela29 authored Jul 15, 2024
1 parent c35200d commit 11cfd33
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 7 deletions.
14 changes: 9 additions & 5 deletions sherpa-onnx/csrc/online-zipformer2-transducer-model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ namespace sherpa_onnx {
OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
const OnlineModelConfig &config)
: env_(ORT_LOGGING_LEVEL_WARNING),
sess_opts_(GetSessionOptions(config)),
encoder_sess_opts_(GetSessionOptions(config)),
decoder_sess_opts_(GetSessionOptions(config, "decoder")),
joiner_sess_opts_(GetSessionOptions(config, "joiner")),
config_(config),
allocator_{} {
{
Expand All @@ -57,7 +59,9 @@ OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
AAssetManager *mgr, const OnlineModelConfig &config)
: env_(ORT_LOGGING_LEVEL_WARNING),
config_(config),
sess_opts_(GetSessionOptions(config)),
encoder_sess_opts_(GetSessionOptions(config)),
decoder_sess_opts_(GetSessionOptions(config)),
joiner_sess_opts_(GetSessionOptions(config)),
allocator_{} {
{
auto buf = ReadFile(mgr, config.transducer.encoder);
Expand All @@ -79,7 +83,7 @@ OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
size_t model_data_length) {
encoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
model_data_length, sess_opts_);
model_data_length, encoder_sess_opts_);

GetInputNames(encoder_sess_.get(), &encoder_input_names_,
&encoder_input_names_ptr_);
Expand Down Expand Up @@ -132,7 +136,7 @@ void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
void OnlineZipformer2TransducerModel::InitDecoder(void *model_data,
size_t model_data_length) {
decoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
model_data_length, sess_opts_);
model_data_length, decoder_sess_opts_);

GetInputNames(decoder_sess_.get(), &decoder_input_names_,
&decoder_input_names_ptr_);
Expand All @@ -157,7 +161,7 @@ void OnlineZipformer2TransducerModel::InitDecoder(void *model_data,
void OnlineZipformer2TransducerModel::InitJoiner(void *model_data,
size_t model_data_length) {
joiner_sess_ = std::make_unique<Ort::Session>(env_, model_data,
model_data_length, sess_opts_);
model_data_length, joiner_sess_opts_);

GetInputNames(joiner_sess_.get(), &joiner_input_names_,
&joiner_input_names_ptr_);
Expand Down
5 changes: 4 additions & 1 deletion sherpa-onnx/csrc/online-zipformer2-transducer-model.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ class OnlineZipformer2TransducerModel : public OnlineTransducerModel {

private:
Ort::Env env_;
Ort::SessionOptions sess_opts_;
Ort::SessionOptions encoder_sess_opts_;
Ort::SessionOptions decoder_sess_opts_;
Ort::SessionOptions joiner_sess_opts_;

Ort::AllocatorWithDefaultOptions allocator_;

std::unique_ptr<Ort::Session> encoder_sess_;
Expand Down
16 changes: 15 additions & 1 deletion sherpa-onnx/csrc/session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
std::to_string(trt_config.trt_timing_cache_enable);
auto trt_dump_subgraphs =
std::to_string(trt_config.trt_dump_subgraphs);

std::vector<TrtPairs> trt_options = {
{"device_id", device_id.c_str()},
{"trt_max_workspace_size", trt_max_workspace_size.c_str()},
Expand Down Expand Up @@ -223,6 +222,21 @@ Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config) {
config.provider_config.provider, &config.provider_config);
}

Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
const std::string &model_type) {
/*
Transducer models : Only encoder will run with tensorrt,
decoder and joiner will run with cuda
*/
if(config.provider_config.provider == "trt" &&
(model_type == "decoder" || model_type == "joiner")) {
return GetSessionOptionsImpl(config.num_threads,
"cuda", &config.provider_config);
}
return GetSessionOptionsImpl(config.num_threads,
config.provider_config.provider, &config.provider_config);
}

Ort::SessionOptions GetSessionOptions(const OfflineModelConfig &config) {
return GetSessionOptionsImpl(config.num_threads, config.provider);
}
Expand Down
3 changes: 3 additions & 0 deletions sherpa-onnx/csrc/session.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ namespace sherpa_onnx {

Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config);

Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
const std::string &model_type);

Ort::SessionOptions GetSessionOptions(const OfflineModelConfig &config);

Ort::SessionOptions GetSessionOptions(const OfflineLMConfig &config);
Expand Down

0 comments on commit 11cfd33

Please sign in to comment.