diff --git a/_sources/recipes/TTS/ljspeech/vits.rst.txt b/_sources/recipes/TTS/ljspeech/vits.rst.txt
index 323d0adfc8..d31bf63022 100644
--- a/_sources/recipes/TTS/ljspeech/vits.rst.txt
+++ b/_sources/recipes/TTS/ljspeech/vits.rst.txt
@@ -56,7 +56,8 @@ Training
       --start-epoch 1 \
       --use-fp16 1 \
       --exp-dir vits/exp \
-      --tokens data/tokens.txt
+      --tokens data/tokens.txt \
+      --model-type high \
       --max-duration 500
 
 .. note::
@@ -64,6 +65,11 @@ Training
     You can adjust the hyper-parameters to control the size of the VITS model and
     the training configurations. For more details, please run ``./vits/train.py --help``.
 
+.. warning::
+
+   If you want a model that runs faster on CPU, please use ``--model-type low``
+   or ``--model-type medium``.
+
 .. note::
 
     The training can take a long time (usually a couple of days).
@@ -95,8 +101,8 @@ training part first. It will save the ground-truth and generated wavs to the dir
 Export models
 -------------
 
-Currently we only support ONNX model exporting. It will generate two files in the given ``exp-dir``:
-``vits-epoch-*.onnx`` and ``vits-epoch-*.int8.onnx``.
+Currently we only support ONNX model exporting. It will generate one file in the given ``exp-dir``:
+``vits-epoch-*.onnx``.
 
 .. code-block:: bash
 
@@ -120,4 +126,7 @@ Download pretrained models
 If you don't want to train from scratch, you can download the pretrained models
 by visiting the following link:
 
-  - `<https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2024-02-28>`_
+  - ``--model-type=high``: `<https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2024-02-28>`_
+  - ``--model-type=medium``: `<https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12>`_
+  - ``--model-type=low``: `<https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12>`_
+
diff --git a/recipes/TTS/ljspeech/vits.html b/recipes/TTS/ljspeech/vits.html
index 2a9b4b63b0..9f792d98da 100644
--- a/recipes/TTS/ljspeech/vits.html
+++ b/recipes/TTS/ljspeech/vits.html
@@ -152,7 +152,8 @@ <h2>Training<a class="headerlink" href="#training" title="Permalink to this head
 <span class="w">    </span>--start-epoch<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
 <span class="w">    </span>--use-fp16<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
 <span class="w">    </span>--exp-dir<span class="w"> </span>vits/exp<span class="w"> </span><span class="se">\</span>
-<span class="w">    </span>--tokens<span class="w"> </span>data/tokens.txt
+<span class="w">    </span>--tokens<span class="w"> </span>data/tokens.txt<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--model-type<span class="w"> </span>high<span class="w"> </span><span class="se">\</span>
 <span class="w">    </span>--max-duration<span class="w"> </span><span class="m">500</span>
 </pre></div>
 </div>
@@ -161,6 +162,11 @@ <h2>Training<a class="headerlink" href="#training" title="Permalink to this head
 <p>You can adjust the hyper-parameters to control the size of the VITS model and
 the training configurations. For more details, please run <code class="docutils literal notranslate"><span class="pre">./vits/train.py</span> <span class="pre">--help</span></code>.</p>
 </div>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>If you want a model that runs faster on CPU, please use <code class="docutils literal notranslate"><span class="pre">--model-type</span> <span class="pre">low</span></code>
+or <code class="docutils literal notranslate"><span class="pre">--model-type</span> <span class="pre">medium</span></code>.</p>
+</div>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>The training can take a long time (usually a couple of days).</p>
@@ -187,8 +193,8 @@ <h2>Inference<a class="headerlink" href="#inference" title="Permalink to this he
 </section>
 <section id="export-models">
 <h2>Export models<a class="headerlink" href="#export-models" title="Permalink to this heading"></a></h2>
-<p>Currently we only support ONNX model exporting. It will generate two files in the given <code class="docutils literal notranslate"><span class="pre">exp-dir</span></code>:
-<code class="docutils literal notranslate"><span class="pre">vits-epoch-*.onnx</span></code> and <code class="docutils literal notranslate"><span class="pre">vits-epoch-*.int8.onnx</span></code>.</p>
+<p>Currently we only support ONNX model exporting. It will generate one file in the given <code class="docutils literal notranslate"><span class="pre">exp-dir</span></code>:
+<code class="docutils literal notranslate"><span class="pre">vits-epoch-*.onnx</span></code>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./vits/export-onnx.py<span class="w"> </span><span class="se">\</span>
 <span class="w">    </span>--epoch<span class="w"> </span><span class="m">1000</span><span class="w"> </span><span class="se">\</span>
 <span class="w">    </span>--exp-dir<span class="w"> </span>vits/exp<span class="w"> </span><span class="se">\</span>
@@ -208,7 +214,9 @@ <h2>Download pretrained models<a class="headerlink" href="#download-pretrained-m
 by visiting the following link:</p>
 <blockquote>
 <div><ul class="simple">
-<li><p><a class="reference external" href="https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2024-02-28">https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2024-02-28</a></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--model-type=high</span></code>: <a class="reference external" href="https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2024-02-28">https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2024-02-28</a></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--model-type=medium</span></code>: <a class="reference external" href="https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12">https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12</a></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--model-type=low</span></code>: <a class="reference external" href="https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12">https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12</a></p></li>
 </ul>
 </div></blockquote>
 </section>
diff --git a/searchindex.js b/searchindex.js
index cf8d862ded..9f2d0074bc 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "for-dummies/data-preparation", "for-dummies/decoding", "for-dummies/environment-setup", "for-dummies/index", "for-dummies/model-export", "for-dummies/training", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Finetune/from_supervised/finetune_zipformer", "recipes/Finetune/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/RNN-LM/index", "recipes/RNN-LM/librispeech/lm-training", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/TTS/index", "recipes/TTS/ljspeech/vits", "recipes/TTS/vctk/vits", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "for-dummies/data-preparation.rst", "for-dummies/decoding.rst", "for-dummies/environment-setup.rst", "for-dummies/index.rst", "for-dummies/model-export.rst", "for-dummies/training.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Finetune/from_supervised/finetune_zipformer.rst", "recipes/Finetune/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/RNN-LM/index.rst", "recipes/RNN-LM/librispeech/lm-training.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/TTS/index.rst", "recipes/TTS/ljspeech/vits.rst", "recipes/TTS/vctk/vits.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Data Preparation", "Decoding", "Environment setup", "Icefall for dummies tutorial", "Model Export", "Training", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Finetune from a supervised pre-trained Zipformer model", "Fine-tune a pre-trained model", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "RNN-LM", "Train an RNN language model", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "TTS", "VITS-LJSpeech", "VITS-VCTK", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60, 61], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 30, 31, 33, 34, 35, 36, 38, 39, 42, 46, 47, 49, 51, 53, 59, 60], "tool": [0, 10, 21, 24], "make": [0, 1, 3, 24, 25, 26, 33, 35, 38, 53], "consist": [0, 35, 41, 55, 56, 57], "possibl": [0, 2, 3, 33, 38], "black": 0, "format": [0, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "flake8": 0, "check": [0, 21, 38, 51], "qualiti": [0, 34], "isort": 0, "sort": [0, 21, 51], "import": [0, 9, 10, 15, 21, 24, 56, 57], "The": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 26, 31, 33, 34, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "version": [0, 9, 13, 15, 20, 21, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 46, 47, 56], "abov": [0, 4, 6, 7, 10, 13, 15, 22, 24, 25, 26, 27, 33, 34, 35, 36, 38, 41, 43, 44, 49, 53, 55, 56, 57], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "22": [0, 9, 15, 21, 24, 25, 38, 46, 47, 49], "3": [0, 4, 6, 7, 9, 10, 11, 15, 20, 22, 23, 27, 30, 36, 39, 41, 42, 43, 44, 49, 51, 55, 56, 57, 59, 60], "0": [0, 1, 4, 6, 7, 9, 11, 13, 15, 20, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "5": [0, 7, 15, 23, 30, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59], "4": [0, 4, 5, 6, 7, 9, 10, 11, 13, 15, 20, 22, 23, 30, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "10": [0, 7, 9, 15, 20, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "1": [0, 4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 27, 28, 29, 30, 31, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "after": [0, 1, 6, 9, 11, 12, 13, 16, 19, 21, 22, 24, 25, 26, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57], "run": [0, 2, 8, 10, 11, 13, 14, 15, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "command": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 60], "git": [0, 4, 6, 7, 9, 13, 15, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 42, 46, 47, 49, 51], "clone": [0, 4, 6, 7, 9, 13, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 42, 46, 47, 49, 51], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "github": [0, 2, 6, 9, 11, 13, 15, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "com": [0, 2, 6, 9, 11, 13, 18, 19, 21, 22, 24, 25, 28, 29, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "k2": [0, 2, 9, 10, 13, 15, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 55, 56, 57], "fsa": [0, 2, 9, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 33, 35, 38, 41, 43, 44, 55, 56, 57], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 18, 19, 22, 23, 27, 28, 29, 30, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60, 61], "cd": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "pip": [0, 1, 6, 10, 13, 15, 21, 24, 27, 35], "instal": [0, 1, 4, 6, 10, 14, 15, 17, 19, 20, 22, 23, 27, 30, 31, 39, 41, 43, 44, 49, 55, 56, 57], "pre": [0, 3, 4, 6, 7, 8, 9, 15, 17, 19, 20, 21, 23, 30, 39, 61], "commit": [0, 21], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "automat": [0, 14, 19, 39], "hook": 0, "invok": 0, "fail": 0, "If": [0, 2, 4, 6, 7, 8, 9, 10, 11, 13, 15, 19, 24, 25, 26, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "ani": [0, 4, 6, 7, 13, 21, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 17, 19, 20, 24, 25, 26, 27, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "wa": [0, 22, 38, 42], "success": [0, 21, 24, 25], "pleas": [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 19, 21, 23, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "fix": [0, 9, 10, 13, 24, 25, 26, 38], "issu": [0, 4, 6, 7, 10, 21, 24, 25, 38, 39, 56, 57], "report": [0, 9, 10, 39], "some": [0, 1, 4, 6, 9, 22, 24, 25, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "i": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57], "e": [0, 2, 4, 5, 6, 7, 13, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "modifi": [0, 23, 30, 33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "file": [0, 2, 9, 14, 15, 19, 20, 22, 24, 25, 26, 28, 29, 30, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "place": [0, 21, 22, 35, 38, 42], "so": [0, 4, 6, 7, 9, 13, 19, 20, 21, 22, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 9, 15, 19, 21, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "which": [0, 2, 4, 6, 7, 9, 11, 12, 15, 19, 21, 22, 24, 25, 26, 27, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 56, 57], "ha": [0, 2, 20, 21, 23, 24, 25, 26, 27, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 53, 55, 56, 57], "been": [0, 21, 23, 24, 25, 26, 35], "befor": [0, 1, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 31, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "further": [0, 4, 6, 7, 15], "chang": [0, 4, 6, 7, 10, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "all": [0, 9, 11, 13, 14, 18, 19, 22, 24, 25, 26, 28, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "again": [0, 24, 25, 49], "should": [0, 2, 4, 6, 11, 13, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60, 61], "time": [0, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "succeed": 0, "want": [0, 4, 6, 7, 11, 13, 15, 21, 22, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "do": [0, 2, 4, 6, 13, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "Or": 0, "without": [0, 4, 6, 7, 9, 15, 17, 19, 33, 38], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 24, 25, 26, 27, 28, 29, 30, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 4, 6, 7, 8, 9, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "prepar": [1, 3, 4, 8, 14, 16, 20, 22, 32, 58], "environ": [1, 10, 11, 12, 14, 16, 20, 24, 25, 26, 31, 33, 34, 35, 36, 38, 39, 41, 42, 46, 47, 49, 56, 57], "doc": [1, 22, 53], "r": [1, 13, 21, 24, 25, 26, 46, 47], "requir": [1, 4, 6, 11, 13, 15, 21, 26, 31, 39, 51, 56, 57, 59, 60], "txt": [1, 4, 9, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 42, 46, 47, 49, 51, 59, 60], "set": [1, 4, 6, 7, 10, 12, 13, 16, 21, 24, 25, 26, 31, 32, 33, 35, 36, 38, 39, 41, 43, 44, 49, 51, 55, 56, 57], "up": [1, 21, 22, 24, 25, 26, 33, 36, 38, 39, 41, 42, 43, 44, 56, 57], "readi": [1, 33, 38, 39, 51], "refer": [1, 2, 5, 6, 7, 11, 13, 15, 21, 22, 23, 24, 25, 26, 28, 29, 33, 35, 36, 38, 41, 42, 43, 46, 47, 49, 51, 53, 56, 57], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 21, 22, 24, 25, 26, 33, 35, 38, 58], "local": [1, 9, 15, 21, 41, 43, 44, 51, 55, 56, 57], "preview": 1, "what": [1, 2, 11, 15, 21, 24, 25, 26, 35, 53], "look": [1, 2, 4, 6, 7, 14, 18, 21, 24, 25, 26, 33, 35, 36, 38, 39], "like": [1, 2, 9, 11, 19, 24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 49, 53, 55, 56], "publish": [1, 22, 34], "html": [1, 2, 10, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 41, 55, 56, 57], "gener": [1, 6, 9, 14, 15, 22, 24, 25, 26, 27, 28, 29, 32, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57, 59, 60], "view": [1, 8, 20, 24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56, 57], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "python3": [1, 9, 10, 13, 15, 21, 25, 26], "m": [1, 15, 21, 24, 25, 26, 35, 41, 43, 44, 46, 47, 55, 56, 57], "server": [1, 19, 55], "It": [1, 2, 6, 7, 9, 11, 14, 15, 17, 21, 23, 24, 25, 26, 27, 28, 29, 33, 34, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "print": [1, 12, 16, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "serv": [1, 41, 43, 44, 55, 56, 57], "port": [1, 14, 31, 39, 41, 43, 44, 55, 56, 57], "8000": [1, 11, 15, 49], "open": [1, 4, 6, 7, 9, 20, 22, 24, 25, 26, 34, 35, 38, 39], "browser": [1, 17, 19, 41, 43, 44, 55, 56, 57], "go": [1, 7, 33, 35, 38, 41, 43, 44, 55, 56, 57], "read": [2, 11, 15, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "code": [2, 3, 8, 10, 13, 15, 20, 21, 24, 25, 26, 33, 38, 39, 41, 42, 46, 47, 49, 53, 56, 57], "style": [2, 3, 20], "adjust": [2, 51, 59, 60], "design": 2, "python": [2, 9, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 33, 35, 38, 41, 43, 44, 51, 55, 56, 57, 59, 60], "recommend": [2, 6, 7, 9, 21, 31, 33, 35, 36, 38, 39, 41, 56, 57], "test": [2, 4, 9, 15, 20, 22, 23, 30, 31, 33, 35, 36, 38, 39, 42, 43, 46, 47, 51, 59, 60], "valid": [2, 21, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "dataset": [2, 10, 11, 13, 14, 21, 22, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "lhots": [2, 9, 11, 13, 15, 20, 22, 24, 25, 26, 33, 35, 38], "readthedoc": [2, 11, 21], "io": [2, 9, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 41, 55, 56, 57], "en": [2, 11, 21, 24], "latest": [2, 9, 11, 13, 19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "index": [2, 21, 23, 24, 25, 26, 27, 28, 29, 55, 56, 57], "yesno": [2, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 37, 49, 61], "veri": [2, 3, 7, 13, 24, 25, 26, 31, 35, 46, 47, 49, 56, 57], "good": [2, 7], "exampl": [2, 11, 13, 19, 20, 22, 24, 25, 26, 28, 29, 30, 39, 42, 46, 47, 49], "speech": [2, 11, 13, 14, 19, 20, 21, 23, 32, 34, 35, 49, 59, 60, 61], "pull": [2, 4, 6, 7, 9, 24, 25, 26, 27, 31, 33, 35, 38, 51, 53], "380": [2, 24, 47], "show": [2, 4, 6, 7, 9, 15, 19, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "add": [2, 11, 24, 25, 26, 33, 35, 36, 56, 61], "new": [2, 3, 9, 13, 19, 21, 24, 25, 26, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 49, 55, 56, 57], "suppos": [2, 9, 56, 57], "would": [2, 11, 22, 24, 25, 26, 38, 42, 56, 57], "name": [2, 9, 10, 13, 15, 22, 24, 25, 26, 27, 33, 35, 41, 43, 44, 51, 56, 57], "foo": [2, 29, 33, 38, 41, 43, 44, 55, 56, 57], "eg": [2, 9, 10, 11, 12, 15, 16, 18, 21, 22, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "mkdir": [2, 9, 24, 25, 33, 35, 36, 38, 42, 46, 47, 49], "p": [2, 4, 13, 21, 24, 25, 35, 46, 47], "asr": [2, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 61], "touch": 2, "sh": [2, 9, 11, 21, 22, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "chmod": 2, "x": [2, 4, 26, 53], "simpl": [2, 12, 14, 16, 21, 35, 51], "own": [2, 11, 31, 39, 41, 51, 56, 57], "otherwis": [2, 24, 25, 26, 31, 33, 35, 38, 39, 41, 43, 44, 55, 56, 57], "librispeech": [2, 4, 6, 7, 10, 18, 20, 22, 24, 25, 26, 27, 28, 29, 31, 37, 38, 39, 41, 42, 43, 44, 51, 52, 53, 55, 56, 57, 61], "assum": [2, 4, 15, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 39, 41, 42, 46, 47, 49, 51, 55, 56, 57], "fanci": 2, "call": [2, 10, 27, 39, 51], "bar": [2, 29, 33, 38, 41, 43, 44, 55, 56, 57], "organ": 2, "wai": [2, 3, 15, 30, 41, 43, 44, 53, 55, 56, 57], "readm": [2, 33, 35, 36, 38, 42, 46, 47, 49], "md": [2, 18, 22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "asr_datamodul": [2, 9, 10, 15, 21], "pretrain": [2, 4, 6, 7, 15, 22, 24, 25, 26, 27, 29, 31, 33, 35, 36, 38, 42, 46, 47, 49, 58], "For": [2, 4, 6, 7, 9, 10, 14, 18, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "instanc": [2, 9, 10, 12, 16, 18, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "tdnn": [2, 9, 10, 12, 15, 16, 21, 34, 37, 40, 45, 48], "its": [2, 4, 22, 23, 24, 25, 26, 29, 35, 43, 51], "directori": [2, 9, 11, 13, 20, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "structur": [2, 26], "descript": [2, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "contain": [2, 8, 11, 13, 14, 15, 20, 22, 23, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 61], "inform": [2, 4, 6, 11, 12, 16, 21, 22, 33, 35, 36, 38, 41, 42, 43, 46, 47, 49, 53, 55, 56, 57], "g": [2, 4, 5, 6, 7, 11, 13, 21, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "wer": [2, 5, 9, 12, 15, 21, 22, 31, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "etc": [2, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "provid": [2, 11, 15, 19, 21, 22, 23, 24, 25, 26, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 61], "pytorch": [2, 10, 13, 21, 24, 25, 26, 35], "dataload": [2, 21], "take": [2, 7, 9, 22, 39, 41, 49, 51, 56, 57, 59, 60], "input": [2, 22, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47, 49, 53], "checkpoint": [2, 4, 6, 7, 12, 15, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "save": [2, 15, 16, 21, 22, 25, 26, 28, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "dure": [2, 4, 5, 7, 10, 13, 19, 22, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "stage": [2, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "": [2, 4, 6, 7, 9, 14, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "definit": [2, 24, 25], "neural": [2, 4, 6, 7, 33, 38, 51], "network": [2, 33, 35, 38, 41, 43, 44, 51, 55, 56, 57], "script": [2, 6, 7, 13, 14, 20, 21, 29, 30, 33, 35, 36, 38, 39, 42, 46, 47, 49, 51, 55], "infer": [2, 22, 24, 25, 58], "tdnn_lstm_ctc": [2, 36, 42, 47], "conformer_ctc": [2, 33, 38], "get": [2, 9, 13, 14, 15, 19, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 49, 53, 55, 56, 57], "feel": [2, 39, 51, 55], "result": [2, 4, 7, 9, 16, 18, 19, 22, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "everi": [2, 22, 41, 43, 44, 55, 56, 57], "kept": [2, 41, 56, 57], "self": [2, 23, 26, 53], "toler": 2, "duplic": 2, "among": [2, 21], "differ": [2, 9, 12, 21, 24, 25, 26, 27, 31, 32, 33, 34, 38, 39, 41, 53, 55, 56, 57], "invoc": [2, 24, 25], "help": [2, 12, 14, 16, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "blob": [2, 11, 18, 21, 22, 29, 41, 43, 44, 55, 56, 57], "master": [2, 6, 9, 11, 15, 18, 21, 22, 25, 26, 28, 29, 31, 35, 39, 41, 43, 44, 55, 56, 57], "transform": [2, 6, 7, 33, 38, 55], "conform": [2, 28, 34, 35, 37, 40, 41, 43, 55, 56, 57], "base": [2, 4, 7, 13, 26, 31, 33, 35, 36, 38, 39, 41, 43, 44, 51, 55, 56, 57], "lstm": [2, 23, 29, 30, 34, 37, 40, 45, 52, 54], "attent": [2, 26, 35, 36, 39, 53, 56, 57], "lm": [2, 4, 5, 7, 9, 11, 20, 21, 35, 41, 42, 46, 47, 49, 51, 56, 57, 61], "rescor": [2, 5, 20, 36, 42, 44, 46, 47, 49, 51], "demonstr": [2, 14, 15, 17, 19, 22, 27], "consid": [2, 4, 26, 31], "colab": [2, 21], "notebook": [2, 21], "welcom": 3, "There": [3, 4, 15, 24, 25, 26, 27, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "mani": [3, 12, 21, 56, 57], "two": [3, 4, 11, 14, 15, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "them": [3, 5, 6, 17, 18, 19, 24, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "To": [3, 4, 5, 6, 7, 9, 11, 15, 19, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "document": [3, 20, 22, 23, 24, 25, 26, 27, 44], "repositori": [3, 9, 24, 25, 26, 27], "recip": [3, 4, 6, 7, 9, 11, 15, 18, 20, 21, 22, 27, 31, 33, 35, 36, 38, 39, 41, 42, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "In": [3, 4, 6, 10, 15, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 32, 33, 35, 36, 38, 39, 42, 46, 47, 49, 53], "page": [3, 19, 28, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 61], "describ": [3, 5, 8, 9, 17, 22, 24, 25, 27, 28, 29, 30, 33, 35, 36, 38, 41, 42, 46, 47, 56, 57], "how": [3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "creat": [3, 4, 6, 7, 14, 15, 20, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56], "data": [3, 4, 6, 7, 8, 13, 14, 15, 16, 20, 22, 24, 25, 26, 27, 28, 29, 32, 34, 51, 58], "train": [3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 28, 29, 30, 50, 53, 58, 61], "decod": [3, 4, 8, 10, 11, 14, 15, 19, 20, 24, 25, 26, 29, 30, 31, 51], "model": [3, 4, 6, 7, 9, 11, 12, 14, 17, 19, 20, 21, 23, 39, 50, 53, 58, 61], "As": [4, 5, 6, 7, 24, 35, 38, 39, 51], "type": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 33, 35, 38, 41, 43, 44, 49, 53, 55, 56, 57], "e2": [4, 7, 21, 51], "usual": [4, 6, 7, 12, 33, 35, 36, 38, 39, 41, 43, 44, 51, 55, 56, 57, 59, 60], "an": [4, 5, 6, 7, 9, 11, 13, 15, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 33, 34, 35, 38, 39, 41, 44, 49, 50, 55, 56, 57, 59, 60, 61], "intern": [4, 5], "languag": [4, 7, 11, 19, 20, 33, 35, 36, 50, 61], "learn": [4, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "level": [4, 5, 15, 51], "corpu": [4, 6, 7, 34, 51], "real": 4, "life": 4, "scenario": 4, "often": [4, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "mismatch": [4, 31, 56], "between": [4, 7, 41, 56, 57], "target": [4, 19, 21], "space": [4, 17, 20, 51], "problem": [4, 6, 7, 21, 39], "when": [4, 6, 9, 10, 15, 19, 24, 25, 26, 30, 35, 38, 39, 41, 43, 44, 51, 56, 57], "act": 4, "against": [4, 21], "extern": [4, 5, 6, 7], "tutori": [4, 5, 6, 7, 13, 15, 20, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 51, 55, 56, 57, 59, 60], "low": [4, 24, 25, 32], "order": [4, 13, 21, 24, 25, 26, 33, 36, 38, 42, 46, 47], "densiti": 4, "ratio": 4, "allevi": 4, "effect": [4, 7, 26], "improv": [4, 5, 6, 7, 32, 35, 51], "perform": [4, 6, 7, 23, 31, 32, 35, 39, 56], "languga": 4, "integr": [4, 19], "pruned_transducer_stateless7_stream": [4, 6, 7, 26, 27, 57], "stream": [4, 6, 7, 15, 20, 23, 24, 25, 27, 30, 33, 38, 46, 47, 55, 61], "howev": [4, 6, 7, 22, 25, 32, 39], "easili": [4, 6, 7, 33, 36, 38], "appli": [4, 6, 7, 35, 53], "other": [4, 7, 9, 13, 14, 15, 22, 25, 26, 27, 35, 38, 39, 41, 42, 46, 47, 49, 53, 56, 57, 61], "encount": [4, 6, 7, 10, 21, 26, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "here": [4, 6, 7, 22, 24, 25, 26, 33, 35, 36, 38, 39, 42, 53, 56], "simplic": [4, 6, 7], "same": [4, 6, 7, 21, 22, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "domain": [4, 6, 7, 31, 32], "gigaspeech": [4, 6, 7, 18, 28, 31, 55], "first": [4, 6, 9, 10, 11, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "let": [4, 6, 7, 14, 21, 24, 25, 26, 31, 33, 38, 51], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 35, 53, 57], "address": [4, 9, 15, 19, 21, 22, 24, 25, 26, 35, 41, 44, 55, 56, 57], "sourc": [4, 11, 13, 21, 22, 24, 25, 26, 33, 34, 35, 38], "acoust": [4, 56, 57], "similar": [4, 5, 31, 39, 43, 56, 57], "deriv": 4, "formula": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 11, 16, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "score": [4, 5, 7, 33, 38, 41, 56, 57], "left": [4, 24, 26, 35, 56, 57], "y_u": 4, "mathit": 4, "y": 4, "right": [4, 24, 35, 53, 56], "log": [4, 9, 10, 12, 15, 16, 21, 24, 25, 26, 42, 46, 47, 49, 59, 60], "y_": 4, "u": [4, 21, 24, 25, 26, 33, 35, 36, 38, 39, 49], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 9, 10, 56], "weight": [4, 15, 33, 36, 38, 43, 44, 51, 55], "respect": 4, "onli": [4, 6, 8, 11, 13, 14, 15, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "compar": [4, 24, 25, 26, 31, 56], "shallow": [4, 5, 20, 51], "fusion": [4, 5, 20, 51], "subtract": [4, 5], "work": [4, 9, 13, 15, 24, 25, 26, 38], "treat": [4, 25, 26], "predictor": 4, "joiner": [4, 24, 25, 26, 27, 29, 31, 35, 41, 55, 56, 57], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 11, 21, 33, 39, 41, 43, 44, 46, 47, 55, 56, 57], "gram": [4, 6, 21, 33, 35, 36, 41, 42, 44, 46, 47, 56, 57], "approxim": [4, 5], "ilm": 4, "lead": [4, 7, 12], "rnnt": [4, 41, 56, 57], "bi": [4, 6], "addit": [4, 32], "estim": 4, "li": 4, "choic": 4, "accord": [4, 51], "origin": [4, 5, 31, 32], "paper": [4, 5, 39, 41, 55, 56, 57, 59, 60], "achiev": [4, 6, 7, 31, 51, 53], "both": [4, 31, 41, 43, 44, 53, 55, 56, 57], "intra": 4, "cross": 4, "much": [4, 24, 25, 31], "faster": [4, 6], "evalu": 4, "now": [4, 6, 9, 13, 15, 21, 24, 25, 26, 33, 38, 39, 41, 42, 43, 44, 46, 47, 51, 55, 56, 57], "illustr": [4, 6, 7, 31, 51], "purpos": [4, 6, 7, 24, 25, 31, 51], "from": [4, 6, 7, 9, 10, 11, 14, 15, 17, 19, 20, 21, 22, 24, 25, 26, 27, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60, 61], "link": [4, 6, 7, 18, 21, 22, 23, 41, 43, 44, 55, 56, 57, 59, 60], "scratch": [4, 6, 7, 31, 41, 43, 44, 51, 55, 56, 57, 59, 60], "prune": [4, 6, 7, 22, 26, 27, 35, 37, 39, 40, 52, 53, 54, 55, 57], "statelessx": [4, 6, 7, 37, 39, 40, 52, 53, 54], "initi": [4, 6, 7, 9, 31, 33, 36], "step": [4, 6, 7, 11, 14, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 43, 44, 49, 51, 55, 56, 57], "download": [4, 6, 7, 8, 10, 13, 15, 19, 20, 23, 30, 31, 34, 39, 51, 58], "git_lfs_skip_smudg": [4, 6, 7, 24, 25, 26, 27, 31, 51], "huggingfac": [4, 6, 7, 18, 20, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 42, 43, 44, 46, 47, 49, 51, 55, 59, 60], "co": [4, 6, 7, 18, 19, 21, 22, 24, 25, 26, 27, 31, 33, 34, 35, 36, 38, 42, 43, 44, 46, 47, 49, 51, 55, 59, 60], "zengwei": [4, 6, 7, 24, 26, 27, 31, 44, 51, 55, 59], "stateless7": [4, 6, 7, 26, 27], "2022": [4, 6, 7, 22, 24, 25, 26, 27, 35, 41, 43, 44, 55, 56], "12": [4, 6, 7, 9, 14, 21, 22, 24, 25, 26, 27, 33, 35, 36, 38, 41, 43, 44, 46, 49, 55, 56, 57, 60], "29": [4, 6, 7, 21, 26, 27, 33, 35, 36, 38, 42, 43, 46, 47], "exp": [4, 6, 7, 9, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "lf": [4, 6, 7, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 42, 44, 46, 47, 49, 51], "includ": [4, 6, 7, 24, 25, 26, 27, 31, 41, 43, 44, 51, 55, 56, 57], "pt": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "ln": [4, 6, 7, 9, 15, 22, 24, 25, 26, 27, 31, 33, 38, 41, 43, 44, 51, 55, 56, 57], "epoch": [4, 6, 7, 9, 12, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "99": [4, 6, 7, 15, 21, 24, 25, 26, 27, 31], "symbol": [4, 5, 6, 7, 21, 35, 41, 56, 57], "load": [4, 6, 7, 9, 15, 21, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "lang_bpe_500": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 31, 38, 41, 43, 44, 51, 55, 56, 57], "bpe": [4, 5, 6, 7, 22, 24, 25, 26, 27, 29, 31, 38, 41, 43, 44, 51, 55, 56, 57], "done": [4, 6, 7, 9, 13, 15, 21, 22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "via": [4, 6, 7, 14, 21, 23, 28, 29, 30, 31, 51], "exp_dir": [4, 6, 7, 9, 15, 21, 24, 25, 26, 35, 38, 39, 41, 43, 44, 56, 57], "avg": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 35, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "averag": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "fals": [4, 6, 7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 35, 38, 39], "dir": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "max": [4, 6, 7, 21, 22, 24, 25, 31, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57, 59, 60], "durat": [4, 6, 7, 11, 22, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "600": [4, 6, 7, 21, 22, 38, 41, 43, 55, 56, 57], "chunk": [4, 6, 7, 24, 26, 27, 56, 57], "len": [4, 6, 7, 26, 27, 57], "32": [4, 6, 7, 21, 24, 25, 26, 27, 33, 35, 36, 57], "method": [4, 5, 7, 15, 19, 22, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 55, 56, 57], "modified_beam_search": [4, 5, 6, 7, 19, 35, 39, 41, 43, 55, 56, 57], "clean": [4, 9, 15, 21, 26, 33, 35, 38, 39, 41, 42, 43, 44, 55, 56, 57], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 11, 15, 21, 24, 25, 27, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "best": [4, 5, 6, 7, 24, 25, 26, 31, 33, 36, 38], "7": [4, 6, 7, 9, 21, 22, 23, 26, 30, 33, 36, 38, 41, 42, 46, 47, 55, 56], "93": [4, 6, 7, 15], "Then": [4, 6], "necessari": [4, 39, 51], "note": [4, 5, 6, 7, 10, 11, 15, 22, 24, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "960": [4, 31, 38, 41, 43, 44, 55, 56, 57], "hour": [4, 13, 31, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "ezerhouni": [4, 6, 7], "pushd": [4, 6, 7, 27], "popd": [4, 6, 7, 27], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 11, 21, 35, 49], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 21, 38], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 21, 25, 33, 38, 49], "lodr_scal": 4, "24": [4, 9, 10, 13, 15, 21, 24, 25, 36, 42, 46, 47, 49], "modified_beam_search_lodr": [4, 5, 6], "scale": [4, 6, 7, 24, 25, 33, 38, 39, 42, 44, 46, 47], "embed": [4, 6, 7, 35, 41, 51, 55, 56, 57], "dim": [4, 6, 7, 24, 25, 26, 35, 41, 51, 56], "2048": [4, 6, 7, 22, 24, 25, 26, 35, 51], "hidden": [4, 6, 7, 25, 51, 55], "num": [4, 6, 7, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 43, 44, 51, 55, 56, 57, 59, 60], "layer": [4, 6, 7, 24, 25, 26, 35, 39, 41, 51, 53, 55, 56, 57], "vocab": [4, 6, 7, 38], "500": [4, 6, 7, 22, 24, 25, 26, 35, 38, 44, 55, 59, 60], "token": [4, 11, 22, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 42, 46, 47, 49, 51, 59, 60], "ngram": [4, 38, 42, 46, 47], "2": [4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 30, 31, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "extra": [4, 24, 25, 26, 35, 53, 56], "argument": [4, 7, 15, 31, 39, 53], "need": [4, 6, 11, 13, 14, 15, 19, 21, 22, 23, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57], "given": [4, 9, 11, 12, 13, 15, 21, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 56, 57, 59, 60], "specifi": [4, 7, 10, 12, 15, 16, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "neg": [4, 35], "number": [4, 7, 16, 19, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "obtain": [4, 7, 33, 35, 36, 38, 42, 46, 47], "shown": [4, 7], "below": [4, 7, 9, 11, 12, 13, 14, 15, 16, 21, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56], "61": [4, 6], "6": [4, 6, 7, 9, 10, 11, 15, 23, 30, 33, 35, 38, 41, 42, 46, 47, 55, 60], "74": [4, 6, 21, 22], "recal": 4, "lowest": [4, 12, 15, 41, 43, 44, 55, 56, 57], "77": [4, 6, 7, 21, 38], "08": [4, 6, 7, 9, 15, 26, 38, 42, 44, 46, 47, 49, 55], "inde": 4, "even": [4, 19, 21, 25], "better": [4, 6], "increas": [4, 6, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "8": [4, 6, 7, 9, 10, 15, 21, 22, 24, 25, 26, 33, 35, 38, 39, 41, 42, 43, 44, 49, 55, 56, 57], "45": [4, 6, 15, 21, 24, 26, 33, 35, 38], "38": [4, 6, 21, 24, 33, 35, 38, 46], "23": [4, 6, 9, 10, 11, 15, 21, 24, 25, 26, 33, 35, 36, 38, 46, 47, 49], "section": [5, 8, 9, 10, 17, 21, 22, 27, 28, 29, 30, 33, 38], "langugag": 5, "transduc": [5, 20, 22, 23, 27, 30, 31, 34, 37, 39, 40, 51, 52, 53, 54], "rnn": [5, 6, 7, 20, 25, 35, 41, 43, 55, 56, 57, 61], "avail": [5, 6, 8, 15, 20, 21, 22, 24, 25, 26, 31, 32, 33, 35, 38, 42, 46, 47, 49, 55], "beam": [5, 22, 55], "search": [5, 6, 7, 18, 19, 58], "realli": [5, 33, 36, 38, 41, 43, 44, 55, 56, 57], "valu": [5, 7, 24, 25, 26, 31, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "t": [5, 13, 14, 15, 21, 24, 25, 26, 27, 28, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "doe": [5, 15, 24, 25, 26, 33, 35, 38, 49], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 13, 14, 15, 17, 18, 21, 22, 23, 24, 25, 26, 27, 29, 33, 35, 36, 38, 41, 43, 44, 49, 51, 53, 55, 56, 57], "known": 5, "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6, 51], "re": [5, 6, 10, 33, 36, 38, 39, 41, 43, 44, 53, 55, 56, 57], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 20, 51], "commonli": [6, 7, 33, 35, 36, 38, 42, 46, 47, 49], "approach": 6, "incorpor": 6, "unlik": 6, "more": [6, 14, 21, 24, 25, 26, 33, 38, 39, 49, 51, 53, 55, 56, 59, 60], "effici": [6, 7, 41, 56, 57], "than": [6, 21, 22, 25, 33, 35, 36, 38, 41, 42, 43, 44, 49, 55, 56, 57], "sinc": [6, 13, 21, 24, 25, 26, 31, 39, 49, 55], "less": [6, 22, 38, 42, 49, 56, 57], "comput": [6, 15, 21, 22, 24, 25, 26, 33, 35, 36, 39, 41, 42, 44, 46, 47, 49, 55, 56, 57], "gpu": [6, 7, 8, 13, 14, 20, 21, 24, 25, 31, 33, 35, 36, 38, 39, 41, 43, 44, 46, 47, 49, 55, 56, 57], "try": [6, 10, 12, 15, 17, 19, 39, 41, 43, 44, 55, 56, 57], "might": [6, 7, 25, 26, 56, 57], "ideal": [6, 7], "mai": [6, 7, 9, 21, 24, 25, 26, 31, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57, 61], "With": [6, 21], "43": [6, 9, 25, 26, 38], "great": 6, "made": [6, 24], "boost": [6, 7], "tabl": [6, 19, 24, 25, 26], "67": [6, 21], "59": [6, 15, 21, 24, 36, 38], "86": 6, "fact": 6, "arpa": [6, 11, 49], "performn": 6, "depend": [6, 14, 15, 21, 33, 38], "kenlm": 6, "kpu": 6, "archiv": [6, 51], "zip": 6, "execut": [6, 7, 13, 24, 33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "9": [6, 9, 21, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 49, 55, 56, 57], "57": [6, 21, 25, 38, 42], "slightli": 6, "63": [6, 35], "04": [6, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47], "52": [6, 21, 33, 38], "73": 6, "mention": [6, 53], "earlier": 6, "benchmark": [6, 35], "speed": [6, 24, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "132": 6, "95": [6, 34], "177": [6, 21, 22, 25, 26, 35, 36, 38], "96": [6, 21], "210": [6, 46, 47], "262": [6, 7, 15], "62": [6, 7, 21, 38, 42], "65": [6, 7, 21, 24], "352": [6, 7, 38], "58": [6, 7, 10, 21, 38], "488": [6, 7, 24, 25, 26], "400": [6, 9, 34], "610": 6, "870": 6, "156": [6, 15], "203": [6, 15, 22, 38], "255": [6, 25, 26], "160": [6, 15], "263": [6, 9, 15, 21, 25], "singl": [6, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "32g": 6, "v100": [6, 33, 35, 36, 38], "vari": 6, "word": [7, 11, 12, 15, 33, 35, 36, 38, 42, 46, 47, 49, 51], "error": [7, 9, 10, 12, 13, 15, 21, 24, 25, 26, 38], "rate": [7, 12, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "These": [7, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "alreadi": [7, 11, 13, 21, 22, 32], "But": [7, 24, 41, 43, 44, 55, 56, 57], "long": [7, 24, 51, 59, 60], "true": [7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 35, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "either": [7, 15, 19, 33, 35, 36, 38, 56, 57], "choos": [7, 19, 21, 39, 41, 43, 44, 55, 56, 57], "three": [7, 15, 24, 25, 26, 29, 33, 35, 53], "associ": 7, "dimens": [7, 41, 51, 56, 57], "obviou": 7, "rel": [7, 32], "reduct": [7, 15, 21, 24, 25, 43], "around": [7, 31], "A": [7, 14, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 55, 56, 57], "few": [7, 11, 24, 25, 26, 39], "paramet": [7, 14, 22, 24, 25, 26, 28, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 51, 55, 56, 57, 59, 60], "tune": [7, 20, 24, 25, 26, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57, 61], "control": [7, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "too": 7, "small": [7, 31, 32, 35, 46, 47, 49], "fulli": 7, "util": [7, 9, 10, 15, 21, 38], "larg": [7, 13], "domin": 7, "bad": 7, "typic": [7, 33, 35, 36, 38], "activ": [7, 13, 19, 21], "path": [7, 9, 15, 19, 21, 22, 24, 25, 26, 29, 31, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "trade": 7, "off": [7, 24], "accuraci": [7, 24, 25, 32, 34], "larger": [7, 25, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "slower": 7, "built": [8, 9, 21], "imag": [8, 20], "cpu": [8, 12, 13, 14, 15, 16, 20, 21, 22, 24, 25, 26, 28, 33, 41, 43, 44, 49, 56, 57], "still": [8, 24, 25, 26, 32], "introduct": [8, 20, 52, 61], "tag": [8, 20], "cuda": [8, 10, 15, 20, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 55, 56, 57], "enabl": [8, 21, 39], "within": [8, 14, 17, 19, 20, 24, 25], "updat": [8, 24, 25, 26], "host": [9, 22], "hub": 9, "k2fsa": 9, "find": [9, 10, 16, 17, 18, 19, 22, 24, 25, 26, 29, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "dockerfil": 9, "tree": [9, 11, 28, 29, 33, 35, 36, 38, 42, 46, 47, 49, 55], "item": [9, 14], "curl": 9, "registri": 9, "v2": [9, 26, 33, 38], "jq": 9, "give": [9, 11, 15, 35], "someth": [9, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "torch2": [9, 13, 15], "cuda12": 9, "cuda11": [9, 10, 21], "torch1": [9, 10, 21], "cuda10": 9, "13": [9, 10, 15, 21, 22, 24, 25, 26, 31, 35, 36, 38, 42, 43, 46], "releas": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38], "torch": [9, 10, 13, 14, 20, 22, 23, 30, 33, 35, 38], "select": [9, 12, 13, 14, 19, 21, 24, 25, 26, 41, 42, 46, 47, 49, 55, 56, 57], "appropri": [9, 21], "combin": [9, 12, 24, 25, 26], "visit": [9, 18, 19, 41, 43, 44, 55, 56, 57, 59, 60], "pkg": 9, "py3": [9, 10, 21], "v1": [9, 33, 36, 38, 42, 46, 47], "current": [9, 19, 24, 25, 35, 39, 53, 55, 56, 57, 59, 60, 61], "ghcr": 9, "alwai": [9, 21, 22], "sudo": [9, 33, 36], "rm": 9, "bin": [9, 13, 21, 24, 25, 26, 33, 38], "bash": 9, "start": [9, 11, 12, 14, 15, 16, 19, 21, 22, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "interfac": 9, "present": [9, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "root": [9, 24, 25, 26, 51], "60c947eac59c": 9, "workspac": 9, "export": [9, 10, 11, 12, 13, 14, 16, 20, 21, 33, 35, 36, 38, 39, 42, 46, 47, 49, 58], "pythonpath": [9, 11, 12, 13, 15, 16, 21, 24, 25, 26], "user": [9, 10], "copi": [9, 21, 53], "switch": [9, 21, 33, 38, 44], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 21, 26], "site": [9, 10, 15, 21, 26], "packag": [9, 10, 15, 21, 26, 59, 60], "__init__": [9, 10, 15, 21, 22, 24, 25, 26, 33, 35, 38], "line": [9, 10, 11, 24, 25, 26, 41, 51, 56, 57], "modul": [9, 13, 20, 24, 26, 43, 56], "_k2": [9, 10, 21], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 20], "libcuda": 9, "cannot": [9, 20, 24, 25, 26], "share": [9, 20, 21], "object": [9, 20, 21, 33, 35, 36, 41, 49, 55, 56], "No": [9, 13, 20, 24, 25, 26, 49], "stub": 9, "list": [9, 15, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47], "16": [9, 15, 21, 22, 24, 25, 26, 29, 33, 35, 36, 38, 41, 42, 46, 47, 49, 55, 56, 57], "second": [9, 14, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56, 57], "2023": [9, 15, 21, 24, 25, 26, 31, 43, 51, 60], "01": [9, 11, 15, 21, 24, 35, 36, 38, 39, 43], "02": [9, 11, 21, 22, 24, 25, 26, 35, 38, 41, 47, 55, 56, 59], "06": [9, 15, 21, 22, 24, 31, 36, 38, 42, 49], "info": [9, 15, 21, 22, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47, 49], "264": [9, 21, 26], "posixpath": [9, 15, 21, 24, 25, 26, 35, 38], "lang_dir": [9, 15, 21, 35, 38], "lang_phon": [9, 11, 15, 21, 36, 42, 46, 47, 49], "feature_dim": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38, 49], "search_beam": [9, 15, 21, 33, 38, 49], "20": [9, 14, 15, 21, 22, 24, 26, 31, 33, 35, 36, 38, 41, 42, 46, 47, 49, 51, 56], "output_beam": [9, 15, 21, 33, 38, 49], "min_active_st": [9, 15, 21, 33, 38, 49], "30": [9, 10, 15, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56, 57], "max_active_st": [9, 15, 21, 33, 38, 49], "10000": [9, 15, 21, 33, 38, 49], "use_double_scor": [9, 15, 21, 33, 38, 49], "14": [9, 10, 15, 21, 22, 24, 25, 28, 33, 38, 41, 42, 43, 46, 55, 56, 57], "feature_dir": [9, 15, 21, 38], "fbank": [9, 11, 15, 21, 22, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47, 49], "max_dur": [9, 15, 21, 38], "bucketing_sampl": [9, 15, 21, 38], "num_bucket": [9, 15, 21, 38], "concatenate_cut": [9, 15, 21, 38], "duration_factor": [9, 15, 21, 38], "gap": [9, 15, 21, 38], "on_the_fly_feat": [9, 15, 21, 38], "shuffl": [9, 15, 21, 38], "return_cut": [9, 15, 21, 38], "num_work": [9, 15, 21, 38], "env_info": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38], "sha1": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 21], "date": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38], "tue": [9, 21, 24, 38], "jul": [9, 15, 21], "25": [9, 15, 21, 22, 24, 25, 33, 38, 41, 46, 47, 49, 56], "36": [9, 21, 24, 35, 38, 39], "dev": [9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "7640d663": 9, "branch": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38, 43], "375520d": 9, "fri": [9, 22], "28": [9, 21, 24, 25, 35, 38, 42, 59], "07": [9, 21, 24, 25, 26, 33, 35, 36, 38], "hostnam": [9, 15, 21, 22, 24, 25, 26, 35], "ip": [9, 15, 21, 22, 24, 25, 26, 35], "172": 9, "17": [9, 21, 22, 24, 25, 26, 33, 38, 46, 47, 55], "401": 9, "lexicon": [9, 11, 15, 21, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56, 57], "168": [9, 15, 21, 42], "compil": [9, 15, 21, 24, 25, 33, 35, 38], "linv": [9, 11, 15, 21, 35, 38, 49], "403": [9, 42], "273": [9, 15, 21, 22, 35], "devic": [9, 15, 21, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 56, 57], "406": [9, 38], "291": [9, 21], "424": 9, "218": [9, 15, 21, 25], "about": [9, 11, 12, 14, 15, 16, 21, 24, 25, 26, 35, 39, 41, 44, 55, 56, 57], "cut": [9, 15, 21, 38], "425": [9, 25, 38], "252": [9, 21], "504": 9, "204": [9, 21, 26, 38], "batch": [9, 15, 21, 24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 51, 55, 56, 57], "process": [9, 15, 21, 22, 24, 25, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "until": [9, 15, 21, 38, 43], "w": [9, 21, 38, 46, 47], "nnpack": 9, "cpp": [9, 24, 28], "53": [9, 15, 21, 26, 33, 41, 42, 47, 55, 56], "could": [9, 24, 25, 26, 31, 32, 33, 36, 51], "reason": [9, 14, 22, 24, 25, 26, 31, 56], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 21, 33], "transcript": [9, 15, 21, 33, 34, 35, 36, 38, 41, 42, 46, 47, 55, 56, 57], "store": [9, 11, 15, 21, 38, 51], "recog": [9, 15, 21, 35, 38], "test_set": [9, 15, 21, 49], "688": 9, "564": [9, 15, 21], "240": [9, 15, 21, 33, 49], "ins": [9, 15, 21, 38, 49], "del": [9, 15, 21, 38, 49], "sub": [9, 15, 21, 38, 49], "690": 9, "249": [9, 21, 25], "wrote": [9, 15, 21, 38], "detail": [9, 11, 15, 21, 23, 27, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "stat": [9, 15, 21, 38], "err": [9, 15, 21, 35, 38], "316": [9, 21, 38], "congratul": [9, 13, 21, 24, 25, 26, 33, 36, 38, 42, 46, 47, 49], "finish": [9, 14, 33, 35, 36, 38, 39, 41, 42, 46, 47, 49, 56, 57], "successfulli": [9, 13, 21, 24, 25, 26], "collect": [10, 13, 21, 51], "post": 10, "correspond": [10, 18, 19], "solut": 10, "One": 10, "torchaudio": [10, 13, 20, 53], "cu111": 10, "torchvis": 10, "f": [10, 13, 15, 21, 46, 47], "org": [10, 13, 21, 34, 35, 41, 51, 55, 56, 57], "whl": [10, 13, 21], "torch_stabl": [10, 13, 21], "throw": [10, 24, 25, 26], "while": [10, 16, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 43, 44, 51, 55, 56, 57], "That": [10, 11, 14, 15, 16, 24, 25, 39, 41, 55, 56, 57], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 56, 57], "recent": [10, 24, 25, 26], "last": 10, "yesnoasrdatamodul": 10, "home": [10, 24, 25, 33, 38], "xxx": [10, 22, 24, 25, 26], "next": [10, 13, 14, 19, 21, 24, 25, 26, 38, 39, 41, 42, 43, 44, 51, 55, 56, 57], "gen": [10, 13, 14, 19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "kaldi": [10, 11, 13, 14, 19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "34": [10, 24, 25], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 21, 24, 26, 35, 38, 42, 46], "tensorboard": [10, 16, 21, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 13, 21], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "linux": [10, 13, 14, 19, 21, 23, 24, 25, 26, 27], "x86_64": [10, 21, 24], "egg": 10, "handl": [10, 33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "except": [10, 22], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 43], "104": [10, 15, 21], "rais": 10, "anaconda": 10, "maco": [10, 13, 14, 19, 23, 24, 25, 26, 27], "probabl": [10, 35, 41, 43, 55, 56, 57], "variabl": [10, 12, 13, 16, 21, 24, 25, 26, 33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 16, 24], "libpython": 10, "abl": 10, "insid": [10, 29], "codna_prefix": 10, "ld_library_path": 10, "setup": [11, 14, 20, 21, 24, 31, 33, 35, 36, 38, 39, 41, 42, 46, 47, 49, 56, 57, 59, 60], "everyth": [11, 23], "tmp": [11, 12, 13, 15, 16, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "each": [11, 15, 22, 24, 25, 27, 33, 35, 36, 38, 41, 43, 44, 51, 53, 55, 56, 57], "exist": 11, "anyth": [11, 17, 19], "els": 11, "wonder": [11, 15], "url": [11, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "varieti": 11, "folder": [11, 21, 22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "wav": [11, 15, 22, 24, 25, 26, 27, 29, 33, 35, 36, 38, 41, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "scp": 11, "feat": 11, "put": [11, 13, 21, 24, 25, 43, 56], "l": [11, 21, 24, 25, 26, 35, 46, 47, 49], "waves_yesno": [11, 15, 21], "tar": [11, 21], "gz": [11, 21, 51], "l41": 11, "extract": [11, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "yesno_cuts_test": 11, "jsonl": [11, 22], "yesno_cuts_train": 11, "yesno_feats_test": 11, "lca": 11, "yesno_feats_train": 11, "hlg": [11, 15, 21, 42, 46, 47, 49], "l_disambig": [11, 49], "lexicon_disambig": [11, 49], "manifest": [11, 21, 31, 39], "yesno_recordings_test": 11, "yesno_recordings_train": 11, "yesno_supervisions_test": 11, "yesno_supervisions_train": 11, "18": [11, 21, 24, 25, 26, 33, 35, 36, 38, 41, 42, 46, 47, 55, 56, 57], "thei": [11, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "idea": [11, 15, 53], "examin": 11, "relat": [11, 22, 31, 33, 35, 38, 42, 46, 47, 49, 59, 60], "gunzip": 11, "c": [11, 21, 35, 36, 41, 43, 44, 49, 55, 56, 57], "head": [11, 21, 35, 53], "output": [11, 12, 13, 15, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "id": [11, 33, 36, 38, 42, 46, 47], "0_0_0_0_1_1_1_1": 11, "channel": [11, 19, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "sampling_r": 11, "num_sampl": 11, "50800": 11, "35": [11, 21, 22, 24, 25, 26, 35, 38, 55], "channel_id": 11, "0_0_0_1_0_1_1_0": 11, "48880": 11, "0_0_1_0_0_1_1_0": 11, "48160": 11, "audio": [11, 21, 46, 47], "l300": 11, "mean": [11, 14, 15, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "field": [11, 34], "per": [11, 35, 41, 56, 57], "recording_id": 11, "NO": [11, 15, 49], "ye": [11, 15, 49], "hebrew": [11, 49], "supervis": [11, 20, 32, 61], "l510": 11, "furthermor": [11, 35], "featur": [11, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "compress": [11, 21], "lilcom": [11, 21], "cutset": [11, 31], "recordingset": 11, "supervisionset": 11, "featureset": 11, "num_fram": 11, "635": 11, "num_featur": 11, "frame_shift": 11, "storage_typ": 11, "lilcom_chunki": 11, "storage_path": 11, "storage_kei": 11, "13000": 11, "3570": 11, "record": [11, 19, 25, 26, 33, 34, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "monocut": 11, "611": 11, "16570": 11, "12964": 11, "2929": 11, "602": 11, "32463": 11, "12936": 11, "2696": 11, "actual": [11, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "separ": [11, 27, 51], "lang": [11, 21, 22, 35, 38, 44], "quit": [12, 14, 16, 32, 33, 35, 36, 38, 41, 43, 44, 51, 55, 56, 57], "cuda_visible_devic": [12, 16, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "usag": [12, 15, 16, 22, 24, 25, 26, 28, 29, 42, 46, 47, 49], "one": [12, 19, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "tini": [13, 14], "well": [13, 22, 49, 61], "hundr": 13, "thousand": 13, "virtualenv": [13, 21], "icefall_env": [13, 15], "interpret": 13, "usr": 13, "prefix": [13, 22], "pkg_resourc": 13, "wheel": [13, 21, 24], "remeb": 13, "continu": [13, 15, 24, 25, 26, 27, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "caution": [13, 33, 38], "matter": [13, 21, 24], "torchaduio": 13, "from_wheel": [13, 15, 21], "dev20231220": 13, "anytim": 13, "modulenotfounderror": 13, "don": [13, 14, 15, 21, 24, 25, 26, 28, 31, 33, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "walk": 14, "recognit": [14, 19, 20, 23, 24, 25, 32, 34, 35, 49, 61], "system": [14, 51], "out": [14, 39, 51], "minut": [14, 51], "sequenti": 14, "part": [14, 15, 19, 21, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "window": [14, 19, 23, 24, 25, 26, 27], "commun": 14, "appreci": 14, "virtual": 14, "curiou": 14, "quick": 14, "state_dict": [14, 20, 30, 33, 35, 36, 38, 42, 46, 47, 49], "jit": [14, 20, 23, 30, 38], "onnx": [14, 20, 22, 30, 59, 60], "torchscript": [15, 23, 28, 29, 30], "trace": [15, 20, 23, 28, 30], "explain": 15, "kind": [15, 38, 41, 43, 44, 55, 56, 57], "produc": [15, 23, 41, 43, 44, 55, 56, 57], "03": [15, 21, 22, 25, 35, 38, 46, 47, 55], "912": [15, 22], "76": [15, 21, 49], "lr": [15, 21, 31, 35, 55], "weight_decai": [15, 21], "1e": [15, 21], "start_epoch": [15, 21], "best_train_loss": [15, 21, 22, 24, 25, 26], "inf": [15, 21, 22, 24, 25, 26], "best_valid_loss": [15, 21, 22, 24, 25, 26], "best_train_epoch": [15, 21, 22, 24, 25, 26], "best_valid_epoch": [15, 21, 22, 25, 26], "batch_idx_train": [15, 21, 22, 24, 25, 26], "log_interv": [15, 21, 22, 24, 25, 26], "reset_interv": [15, 21, 22, 24, 25, 26], "valid_interv": [15, 21, 22, 24, 25, 26], "beam_siz": [15, 21, 22, 35], "sum": [15, 21], "913": 15, "950": 15, "971": [15, 47], "106": [15, 21, 25, 38], "Not": 15, "974": 15, "111": [15, 21, 38], "kei": [15, 24, 25, 26, 38], "bia": 15, "running_mean": 15, "running_var": 15, "num_batches_track": 15, "output_linear": 15, "48": [15, 21, 24, 25, 33, 35], "089": 15, "090": 15, "ad79f1c699c684de9785ed6ca5edb805a41f78c3": 15, "wed": [15, 21, 24, 33, 35, 38], "26": [15, 21, 24, 25, 26, 35, 38, 47], "09": [15, 22, 25, 33, 35, 36, 38, 55], "aa073f6": 15, "none": [15, 21, 33, 38], "9a47c08": 15, "mon": [15, 25, 26], "aug": [15, 39], "50": [15, 21, 22, 24, 25, 26, 38, 41, 46, 55, 56, 57], "privat": 15, "fangjun": [15, 21, 22, 24, 25, 26, 35, 38], "macbook": 15, "pro": [15, 33, 38], "127": [15, 21, 24, 25, 49], "092": 15, "103": 15, "272": 15, "109": [15, 21, 33, 38], "112": [15, 24, 25, 26], "115": [15, 24, 25, 33, 38], "253": 15, "386": 15, "556": 15, "557": 15, "558": 15, "248": [15, 35], "559": 15, "315": [15, 24, 33, 35, 36, 38, 42], "ident": 15, "kaldifeat": 15, "csukuangfj": [15, 21, 22, 24, 25, 27, 33, 35, 36, 38, 42, 46, 47, 49, 55], "dev20231221": 15, "0_0_0_1_0_0_0_1": [15, 49], "0_0_1_0_0_0_1_0": [15, 49], "19": [15, 22, 24, 25, 26, 31, 33, 38, 42, 46, 47], "208": [15, 38], "136": [15, 38], "num_class": [15, 33, 38, 49], "sample_r": [15, 22, 33, 35, 38, 49], "words_fil": [15, 33, 38, 49], "sound_fil": [15, 22, 33, 35, 38, 49], "142": [15, 24, 33, 36, 38], "144": [15, 38], "212": 15, "213": [15, 49], "construct": [15, 22, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47, 49], "170": [15, 42], "sound": [15, 22, 24, 25, 26, 29, 30, 33, 35, 36, 38, 42, 46, 47, 49], "224": 15, "176": [15, 24, 35, 38], "304": [15, 25], "214": [15, 35, 38], "47": [15, 21, 24, 25, 26, 31, 33, 38], "44": [15, 21, 24, 25, 38, 46, 47], "666": 15, "667": 15, "670": 15, "677": [15, 24], "100": [15, 21, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "843": 15, "cpu_jit": [15, 28, 33, 38, 41, 43, 44, 56, 57], "confus": [15, 28], "move": [15, 28, 41, 43, 44, 56, 57], "map_loc": 15, "resid": 15, "default": [15, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "jit_pretrain": [15, 29, 43, 44, 55], "nn": [15, 35, 41, 43, 44, 55, 56, 57], "56": [15, 21, 24, 25, 38, 46], "00": [15, 21, 24, 33, 35, 36, 38, 42, 46, 47, 49], "603": 15, "121": [15, 42], "nn_model": [15, 33, 38], "129": [15, 36], "640": [15, 21, 26], "134": [15, 33], "641": 15, "138": [15, 33, 35], "148": 15, "642": 15, "154": [15, 36], "727": 15, "190": [15, 42], "192": [15, 26, 38], "export_onnx": 15, "onnxruntim": [15, 27], "888": [15, 33], "83": [15, 38, 42], "892": 15, "diagnost": 15, "verbos": 15, "warn": 15, "21": [15, 21, 22, 24, 33, 35, 38, 46, 47], "047": [15, 35], "meta_data": 15, "model_typ": 15, "model_author": 15, "comment": 15, "non": [15, 20, 38, 53, 56, 61], "vocab_s": [15, 22, 24, 25, 26, 35], "049": 15, "140": [15, 21, 36], "int8": [15, 23, 30, 59, 60], "quantiz": [15, 23, 30, 39], "075": 15, "onnx_quant": 15, "538": [15, 38], "tensor": [15, 21, 25, 26, 33, 35, 36, 38, 41, 49, 55, 56], "transpose_1_output_0": 15, "081": 15, "151": [15, 24], "float32": [15, 24, 25, 26], "onnx_pretrain": [15, 27], "260": [15, 26, 38], "166": 15, "171": [15, 21, 36, 38, 46, 47], "173": 15, "267": [15, 25, 35, 46, 47], "270": 15, "180": [15, 25, 33, 38], "279": [15, 38], "196": 15, "318": [15, 24, 25], "232": 15, "234": [15, 38], "deploi": [15, 27, 33, 38], "sherpa": [15, 19, 23, 28, 29, 30, 55], "framework": [15, 19, 41, 56], "_": [15, 39], "ncnn": [15, 20, 30], "youtub": [17, 20, 38, 39, 41, 42, 43, 44, 55, 56, 57], "video": [17, 20, 38, 39, 41, 42, 43, 44, 55, 56, 57], "upload": [18, 19, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "specif": [18, 27, 35], "aishel": [18, 20, 33, 35, 36, 37, 61], "wenetspeech": [18, 28], "ipad": 19, "phone": 19, "screenshot": [19, 33, 35, 36, 38, 39, 41, 49, 55, 56], "chines": [19, 34, 35], "english": [19, 31, 49, 55], "greedi": 19, "click": [19, 21, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "button": 19, "submit": 19, "wait": 19, "moment": 19, "bottom": [19, 41, 43, 44, 55, 56, 57], "subscrib": [19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "nadira": [19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "povei": [19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "www": [19, 21, 34, 38, 39, 41, 42, 43, 44, 51, 55, 56, 57], "uc_vaumpkminz1pnkfxan9mw": [19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "dummi": [20, 38], "toolkit": 20, "cudnn": 20, "docker": [20, 21], "frequent": 20, "ask": 20, "question": 20, "faq": 20, "oserror": 20, "libtorch_hip": 20, "attributeerror": 20, "distutil": 20, "attribut": [20, 26, 38], "libpython3": 20, "timit": [20, 37, 46, 47, 61], "tt": [20, 59, 60, 61], "vit": [20, 58, 61], "ljspeech": [20, 58, 61], "vctk": [20, 58, 61], "fine": [20, 39, 61], "finetun": [20, 32, 61], "zipform": [20, 23, 27, 30, 32, 37, 40, 51, 52, 54, 61], "contribut": 20, "support": [21, 23, 24, 25, 26, 33, 35, 38, 41, 43, 44, 53, 55, 56, 57, 59, 60], "guid": 21, "suggest": [21, 31, 41, 43, 44, 55, 56, 57], "strongli": 21, "point": [21, 22, 33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "sever": [21, 22, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "just": [21, 24, 25, 26, 51, 53], "kuangfangjun": [21, 24, 25, 26], "cpython3": 21, "final": [21, 22, 24, 25, 38, 42], "64": [21, 22, 24, 35, 56], "9422m": 21, "creator": 21, "cpython3posix": 21, "dest": 21, "star": [21, 24, 25, 26], "fj": [21, 22, 24, 25, 26, 35, 38], "clear": 21, "no_vcs_ignor": 21, "global": 21, "seeder": 21, "fromappdata": 21, "bundl": 21, "app_data_dir": 21, "ad": [21, 24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 49, 53, 55, 56, 57], "seed": 21, "bashactiv": 21, "cshellactiv": 21, "fishactiv": 21, "nushellactiv": 21, "powershellactiv": 21, "pythonactiv": 21, "determin": 21, "nvidia": [21, 33, 35, 36, 38], "smi": 21, "49": [21, 24, 25, 38, 47, 49], "510": 21, "driver": 21, "greater": 21, "our": [21, 24, 25, 26, 28, 29, 38, 39, 41, 53, 56, 57], "case": [21, 22, 24, 25, 26, 32, 41, 43, 44, 55, 56, 57], "verifi": 21, "nvcc": 21, "copyright": 21, "2005": 21, "2019": 21, "corpor": 21, "wed_oct_23_19": 21, "38_pdt_2019": 21, "v10": 21, "89": [21, 33], "cu116": 21, "compat": 21, "stabl": 21, "matrix": 21, "2bcu116": 21, "cp38": 21, "linux_x86_64": 21, "1983": 21, "mb": [21, 24, 25, 26], "________________________________________": 21, "gb": [21, 35], "764": 21, "kb": [21, 24, 25, 26, 46, 47], "eta": 21, "satisfi": 21, "extens": 21, "__version__": 21, "dev20230725": 21, "pypi": 21, "tuna": 21, "tsinghua": 21, "edu": 21, "cn": 21, "resolv": 21, "main": [21, 33, 38, 53], "ubuntu": [21, 24, 25, 26], "2bcuda11": 21, "manylinux_2_17_x86_64": 21, "manylinux2014_x86_64": 21, "graphviz": 21, "cach": [21, 26], "de": [21, 22, 24, 25, 26, 35], "5e": 21, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 21, "o": 21, "cento": 21, "2009": 21, "core": 21, "cmake": [21, 24, 25, 33, 38], "27": [21, 24, 25, 26, 31, 33, 35, 42, 47], "gcc": 21, "cmake_cuda_flag": 21, "wno": 21, "deprec": [21, 35], "lineinfo": 21, "expt": 21, "extend": 21, "lambda": 21, "use_fast_math": 21, "xptxa": 21, "gencod": 21, "arch": 21, "compute_35": 21, "sm_35": 21, "compute_50": 21, "sm_50": 21, "compute_60": 21, "sm_60": 21, "compute_61": 21, "sm_61": 21, "compute_70": 21, "sm_70": 21, "compute_75": 21, "sm_75": 21, "compute_80": 21, "sm_80": 21, "compute_86": 21, "sm_86": 21, "donnx_namespac": 21, "onnx_c2": 21, "compute_52": 21, "sm_52": 21, "xcudaf": 21, "diag_suppress": 21, "cc_clobber_ignor": 21, "integer_sign_chang": 21, "useless_using_declar": 21, "set_but_not_us": 21, "field_without_dll_interfac": 21, "base_class_has_different_dll_interfac": 21, "dll_interface_conflict_none_assum": 21, "dll_interface_conflict_dllexport_assum": 21, "implicit_return_from_non_void_funct": 21, "unsigned_compare_with_zero": 21, "declared_but_not_referenc": 21, "bad_friend_decl": 21, "relax": 21, "constexpr": 21, "d_glibcxx_use_cxx11_abi": 21, "option": [21, 23, 27, 30, 35, 39, 42, 46, 47, 49], "wall": 21, "strict": [21, 26, 34], "overflow": 21, "unknown": 21, "pragma": 21, "cmake_cxx_flag": 21, "unus": 21, "nvtx": 21, "disabl": [21, 22, 24, 25], "debug": 21, "sync": 21, "kernel": [21, 24, 26, 35], "memori": [21, 24, 33, 35, 38, 53], "alloc": 21, "214748364800": 21, "byte": [21, 24, 25, 26], "200": [21, 22, 24, 25, 26, 33, 38, 39, 46, 47, 49], "abort": 21, "__file__": 21, "cpython": [21, 24], "gnu": [21, 24], "req": 21, "vq12fd5i": 21, "filter": 21, "quiet": [21, 34], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 21, "metadata": [21, 46, 47], "pyproject": 21, "toml": 21, "cytoolz": 21, "3b": 21, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 21, "33": [21, 24, 25, 33, 34, 35, 38, 46], "pyyaml": 21, "c8": 21, "6b": 21, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 21, "ma": 21, "nylinux_2_17_x86_64": 21, "736": 21, "dataclass": 21, "2f": 21, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 21, "dev0": 21, "7640d66": 21, "a8": 21, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 21, "linux_2_17_x86_64": 21, "87": [21, 24], "tqdm": 21, "e6": 21, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 21, "numpi": 21, "audioread": 21, "5d": 21, "cb": 21, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 21, "377": 21, "tabul": 21, "40": [21, 24, 25, 26, 36, 38, 42, 46, 47], "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 21, "1a": 21, "70": 21, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 21, "97": [21, 24, 33], "ab": [21, 41, 55, 56, 57], "c3": 21, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 21, "intervaltre": 21, "fb": 21, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 21, "soundfil": 21, "bd": 21, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 21, "py2": 21, "46": [21, 25, 33, 38], "toolz": 21, "7f": 21, "5c": 21, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 21, "55": [21, 24, 36, 38, 46], "sortedcontain": 21, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 21, "cffi": 21, "b7": 21, "8b": 21, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 21, "15": [21, 22, 24, 25, 26, 31, 35, 36, 38, 46, 49, 51], "442": 21, "pycpars": 21, "d5": 21, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 21, "118": [21, 38], "filenam": [21, 24, 25, 26, 27, 28, 29, 43, 44, 55, 57, 59, 60], "size": [21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "687627": 21, "sha256": 21, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 21, "ephem": 21, "wwtk90_m": 21, "7a": 21, "8e": 21, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 21, "23704": 21, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 21, "9c": 21, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 21, "26098": 21, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 21, "f3": 21, "ed": 21, "2b": 21, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 21, "remot": 21, "enumer": 21, "12942": 21, "count": 21, "total": [21, 25, 26, 33, 35, 36, 38, 39, 41, 42, 49, 55, 56], "delta": 21, "reus": 21, "pack": [21, 51, 56, 57], "12875": 21, "receiv": 21, "mib": 21, "8835": 21, "41": [21, 24, 26, 33, 35, 46, 49], "dl_dir": [21, 33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "___________________________________________________": 21, "70m": 21, "1mb": 21, "718": 21, "compute_fbank_yesno": 21, "_______________________________________________________________________________": 21, "90": [21, 24], "82it": 21, "778": 21, "______________________________________________________________________________": 21, "256": [21, 26, 46, 47], "92it": 21, "51": [21, 24, 33, 38, 49], "66": [21, 25, 31], "project": 21, "kaldilm": 21, "csrc": [21, 38], "arpa_file_pars": 21, "cc": 21, "void": 21, "arpafilepars": 21, "std": 21, "istream": 21, "79": 21, "92": [21, 38], "275": [21, 33], "compile_hlg": 21, "124": [21, 33, 38], "276": 21, "convert": [21, 24, 25, 26, 38], "309": 21, "ctc_topo": 21, "max_token_id": 21, "310": 21, "314": 21, "intersect": [21, 41, 56, 57], "323": 21, "lg": [21, 41, 44, 56, 57], "shape": [21, 26], "connect": [21, 22, 38, 41, 42, 55, 56, 57], "68": [21, 38], "class": [21, 38], "71": [21, 38, 42], "341": 21, "rag": 21, "raggedtensor": 21, "remov": [21, 33, 35, 36, 38, 42, 46, 47], "disambigu": 21, "354": 21, "91": 21, "remove_epsilon": 21, "445": 21, "arc": 21, "compos": 21, "h": 21, "446": 21, "447": 21, "segment": 21, "fault": 21, "dump": 21, "protocol_buffers_python_implement": 21, "674": 21, "interest": [21, 39, 41, 43, 44, 55, 56, 57], "936": 21, "481": 21, "482": 21, "world_siz": [21, 39], "master_port": 21, "12354": 21, "num_epoch": 21, "3fb0a43": 21, "thu": [21, 22, 24, 25, 26, 35, 38, 42], "05": [21, 22, 24, 25, 31, 33, 35, 36, 38, 47, 51, 60], "74279": [21, 22, 24, 25, 26, 35], "1220091118": 21, "57c4d55446": 21, "sph26": 21, "941": 21, "949": 21, "495": 21, "965": [21, 33], "146": 21, "244": 21, "967": 21, "149": [21, 24, 38], "199": [21, 38, 42], "singlecutsampl": 21, "205": [21, 38], "968": 21, "565": [21, 38], "422": 21, "loss": [21, 24, 25, 33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "065": 21, "over": [21, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "2436": 21, "frame": [21, 35, 41, 43, 56, 57], "tot_loss": 21, "681": [21, 24], "4561": 21, "2828": 21, "7076": 21, "22192": 21, "54": [21, 25, 26, 38, 42, 46, 47], "167": 21, "444": 21, "9002": 21, "18067": 21, "011": 21, "2555": 21, "2695": 21, "484": 21, "34971": 21, "331": [21, 24, 25, 38, 42], "4688": 21, "368": 21, "75": [21, 24], "633": 21, "2532": 21, "242": [21, 33, 38], "1139": 21, "1592": 21, "522": [21, 38], "1627": 21, "209": [21, 42], "07055": 21, "1175": 21, "07091": 21, "847": 21, "07731": 21, "427": [21, 25, 38], "04391": 21, "05341": 21, "884": 21, "04384": 21, "387": [21, 47], "03458": 21, "04616": 21, "707": [21, 33, 38], "03379": 21, "758": [21, 38], "433": [21, 38], "01054": 21, "980": [21, 38], "009014": 21, "009974": 21, "489": [21, 33], "01085": 21, "258": [21, 46, 47], "01172": 21, "01055": 21, "621": [21, 49], "01074": 21, "699": 21, "866": 21, "01044": 21, "844": 21, "008942": 21, "221": [21, 38], "01082": 21, "970": [21, 38], "01169": 21, "247": 21, "01073": 21, "326": [21, 25], "555": 21, "840": 21, "841": 21, "855": 21, "868": 21, "882": 21, "883": 21, "157": 21, "701": 21, "702": [21, 38], "704": [21, 33, 46], "fun": [21, 24, 25], "variou": [21, 27, 30, 61], "period": [22, 24], "disk": 22, "optim": [22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "resum": [22, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "strip": 22, "reduc": [22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "pruned_transducer_stateless3": [22, 28, 53], "almost": [22, 41, 53, 56, 57], "dict": [22, 26], "stateless3": [22, 24], "repo": [22, 27], "those": 22, "wave": [22, 24, 25, 26, 33, 38], "iter": [22, 24, 25, 26, 29, 41, 43, 44, 55, 56, 57], "1224000": 22, "greedy_search": [22, 31, 35, 41, 43, 55, 56, 57], "test_wav": [22, 24, 25, 26, 27, 33, 35, 36, 38, 42, 46, 47, 49], "1089": [22, 24, 25, 26, 27, 38, 42], "134686": [22, 24, 25, 26, 27, 38, 42], "0001": [22, 24, 25, 26, 27, 38, 42], "1221": [22, 24, 25, 38, 42], "135766": [22, 24, 25, 38, 42], "0002": [22, 24, 25, 38, 42], "multipl": [22, 33, 35, 36, 38, 42, 46, 47, 49], "Its": [22, 24, 25, 26, 38], "233": [22, 24, 25], "265": 22, "3000": [22, 24, 25, 26], "80": [22, 24, 25, 26, 33, 35, 38], "subsampling_factor": [22, 25, 26, 33, 35, 38], "encoder_dim": [22, 24, 25, 26], "512": [22, 24, 25, 26, 33, 35, 38], "nhead": [22, 24, 26, 33, 35, 38, 41, 56], "dim_feedforward": [22, 24, 25, 35], "num_encoder_lay": [22, 24, 25, 26, 35], "decoder_dim": [22, 24, 25, 26], "joiner_dim": [22, 24, 25, 26], "model_warm_step": [22, 24, 25], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 22, "oct": [22, 38], "miss": [22, 24, 25, 26, 35, 38], "cu102": [22, 24, 25, 26], "1013": 22, "c39cba5": 22, "dirti": [22, 24, 25, 33, 38], "ceph": [22, 33, 35, 38], "0324160024": 22, "65bfd8b584": 22, "jjlbn": 22, "bpe_model": [22, 24, 25, 26, 38], "16000": [22, 33, 35, 36, 38, 42, 43, 46, 47], "max_context": 22, "max_stat": 22, "context_s": [22, 24, 25, 26, 35], "max_sym_per_fram": [22, 35], "simulate_stream": 22, "decode_chunk_s": 22, "left_context": 22, "dynamic_chunk_train": 22, "causal_convolut": 22, "short_chunk_s": [22, 26, 56, 57], "num_left_chunk": [22, 26], "blank_id": [22, 24, 25, 26, 35], "unk_id": 22, "271": [22, 25], "612": 22, "458": 22, "giga": [22, 25, 55], "623": 22, "277": 22, "78648040": 22, "951": [22, 38], "285": [22, 35, 38], "952": 22, "295": [22, 33, 35, 36, 38], "957": 22, "301": [22, 38], "700": 22, "329": [22, 25, 38], "388": 22, "earli": [22, 24, 25, 26, 38, 42], "nightfal": [22, 24, 25, 26, 38, 42], "THE": [22, 24, 25, 26, 38, 42], "yellow": [22, 24, 25, 26, 38, 42], "lamp": [22, 24, 25, 26, 38, 42], "light": [22, 24, 25, 26, 38, 42], "AND": [22, 24, 25, 26, 38, 42], "THERE": [22, 24, 25, 26, 38, 42], "squalid": [22, 24, 25, 26, 38, 42], "quarter": [22, 24, 25, 26, 38, 42], "OF": [22, 24, 25, 26, 38, 42], "brothel": [22, 24, 25, 26, 38, 42], "god": [22, 38, 42], "AS": [22, 38, 42], "direct": [22, 38, 42], "consequ": [22, 38, 42], "sin": [22, 38, 42], "man": [22, 38, 42], "punish": [22, 38, 42], "had": [22, 38, 42], "her": [22, 38, 42], "love": [22, 38, 42], "child": [22, 38, 42], "whose": [22, 35, 38, 42], "ON": [22, 24, 38, 42], "THAT": [22, 38, 42], "dishonor": [22, 38, 42], "bosom": [22, 38, 42], "TO": [22, 38, 42], "parent": [22, 38, 42], "forev": [22, 38, 42], "WITH": [22, 38, 42], "race": [22, 38, 42], "descent": [22, 38, 42], "mortal": [22, 38, 42], "BE": [22, 38, 42], "bless": [22, 38, 42], "soul": [22, 38, 42], "IN": [22, 38, 42], "heaven": [22, 38, 42], "yet": [22, 24, 25, 38, 42], "THESE": [22, 38, 42], "thought": [22, 38, 42], "affect": [22, 38, 42], "hester": [22, 38, 42], "prynn": [22, 38, 42], "hope": [22, 34, 38, 42], "apprehens": [22, 38, 42], "390": 22, "down": [22, 33, 38, 41, 43, 44, 55, 56, 57], "reproduc": [22, 38], "9999": [22, 43, 44, 55], "symlink": 22, "pass": [22, 26, 33, 35, 36, 38, 41, 43, 44, 53, 55, 56, 57], "convemform": [23, 30, 53], "platform": [23, 27], "android": [23, 24, 25, 26, 27], "raspberri": [23, 27], "pi": [23, 27], "\u7231\u82af\u6d3e": 23, "maix": 23, "iii": 23, "axera": 23, "rv1126": 23, "static": 23, "binari": [23, 24, 25, 26, 33, 35, 36, 38, 41, 49, 55, 56], "pnnx": [23, 30], "encod": [23, 27, 29, 30, 33, 35, 36, 38, 41, 42, 43, 49, 53, 55, 56, 57], "conv": [24, 25], "emform": [24, 25, 28], "stateless2": [24, 25, 55], "pretrained_model": [24, 25, 26], "online_transduc": 24, "jit_xxx": [24, 25, 26], "anywher": [24, 25], "submodul": 24, "recurs": 24, "init": 24, "dcmake_build_typ": [24, 33, 38], "dncnn_python": 24, "dncnn_build_benchmark": 24, "dncnn_build_exampl": 24, "dncnn_build_tool": 24, "j4": 24, "pwd": 24, "src": [24, 26], "compon": [24, 53], "ncnn2int8": [24, 25], "am": 24, "sai": [24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "later": [24, 25, 26, 33, 36, 38, 41, 42, 43, 44, 46, 47, 55, 56, 57], "termin": 24, "tencent": [24, 25], "modif": [24, 35], "offici": 24, "synchron": 24, "renam": [24, 25, 26], "conv_emformer_transducer_stateless2": [24, 53], "length": [24, 26, 35, 51, 56, 57], "cnn": [24, 26], "31": [24, 25, 26, 38], "context": [24, 35, 41, 53, 55, 56, 57], "configur": [24, 26, 35, 39, 42, 46, 47, 49, 59, 60], "accordingli": [24, 25, 26], "yourself": [24, 25, 26, 39, 56, 57], "220": [24, 35, 36, 38], "229": [24, 33], "best_v": 24, "alid_epoch": 24, "subsampl": [24, 56, 57], "ing_factor": 24, "a34171ed85605b0926eebbd0463d059431f4f74a": 24, "dec": 24, "ver": 24, "ion": 24, "530e8a1": 24, "op": 24, "1220120619": [24, 25, 26], "7695ff496b": [24, 25, 26], "s9n4w": [24, 25, 26], "icefa": 24, "ll": 24, "transdu": 24, "cer": 24, "use_averaged_model": [24, 25, 26], "cnn_module_kernel": [24, 26], "left_context_length": 24, "chunk_length": 24, "right_context_length": 24, "memory_s": 24, "231": [24, 25, 26], "053": 24, "022": 24, "708": [24, 33, 35, 38, 49], "75490012": 24, "320": [24, 35], "682": 24, "lh": [24, 25, 26], "rw": [24, 25, 26], "289m": 24, "jan": [24, 25, 26], "289": 24, "roughli": [24, 25, 26], "equal": [24, 25, 26, 56, 57], "1024": [24, 25, 26, 55], "287": [24, 49], "1010k": [24, 25], "decoder_jit_trac": [24, 25, 26, 29, 55, 57], "283m": 24, "encoder_jit_trac": [24, 25, 26, 29, 55, 57], "0m": [24, 25], "joiner_jit_trac": [24, 25, 26, 29, 55, 57], "sure": [24, 25, 26], "found": [24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "param": [24, 25, 26], "503k": [24, 25], "437": [24, 25, 26], "142m": 24, "79k": 24, "5m": [24, 25], "architectur": [24, 25, 26, 55], "editor": [24, 25, 26], "content": [24, 25, 26], "283": [24, 26], "1010": [24, 25], "503": [24, 25], "convers": [24, 25, 26], "half": [24, 25, 26, 41, 56, 57], "v": [24, 25, 26, 38, 46, 47], "float16": [24, 25, 26], "occupi": [24, 25, 26], "twice": [24, 25, 26], "smaller": [24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "fp16": [24, 25, 26, 31, 41, 43, 44, 51, 55, 56, 57, 59, 60], "won": [24, 25, 26, 27, 33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "accept": [24, 25, 26], "216": [24, 33, 38, 46, 47], "encoder_param_filenam": [24, 25, 26], "encoder_bin_filenam": [24, 25, 26], "decoder_param_filenam": [24, 25, 26], "decoder_bin_filenam": [24, 25, 26], "joiner_param_filenam": [24, 25, 26], "joiner_bin_filenam": [24, 25, 26], "sound_filenam": [24, 25, 26], "141": 24, "328": 24, "336": 24, "106000": [24, 25, 26, 38, 42], "581": [24, 42], "381": 24, "7767517": [24, 25, 26], "1060": 24, "1342": 24, "in0": [24, 25, 26], "explan": [24, 25, 26], "magic": [24, 25, 26], "intermedi": [24, 25, 26], "increment": [24, 25, 26], "1061": 24, "sherpametadata": [24, 25, 26], "sherpa_meta_data1": [24, 25, 26], "newli": [24, 25, 26], "must": [24, 25, 26, 56], "eas": [24, 25, 26], "pair": [24, 25, 26], "sad": [24, 25, 26], "rememb": [24, 25, 26], "anymor": [24, 25, 26], "flexibl": [24, 25, 26], "edit": [24, 25, 26], "arm": [24, 25, 26], "aarch64": [24, 25, 26], "onc": [24, 25], "mayb": [24, 25], "year": [24, 25], "_jit_trac": [24, 25], "fp32": [24, 25], "doubl": [24, 25], "j": [24, 25, 33, 38], "py38": [24, 25, 26], "arg": [24, 25], "wave_filenam": [24, 25], "16k": [24, 25], "hz": [24, 25, 46, 47], "mono": [24, 25], "calibr": [24, 25], "cat": [24, 25], "eof": [24, 25], "calcul": [24, 25, 43, 56, 57], "has_gpu": [24, 25], "config": [24, 25], "use_vulkan_comput": [24, 25], "88": [24, 35], "conv_87": 24, "942385": [24, 25], "threshold": [24, 25, 43], "938493": 24, "968131": 24, "conv_88": 24, "442448": 24, "549335": 24, "167552": 24, "conv_89": 24, "228289": 24, "001738": 24, "871552": 24, "linear_90": 24, "976146": 24, "101789": 24, "267128": 24, "linear_91": 24, "962030": 24, "162033": 24, "602713": 24, "linear_92": 24, "323041": 24, "853959": 24, "953129": 24, "linear_94": 24, "905416": 24, "648006": 24, "323545": 24, "linear_93": 24, "474093": 24, "200188": 24, "linear_95": 24, "888012": 24, "403563": 24, "483986": 24, "linear_96": 24, "856741": 24, "398679": 24, "524273": 24, "linear_97": 24, "635942": 24, "613655": 24, "590950": 24, "linear_98": 24, "460340": 24, "670146": 24, "398010": 24, "linear_99": 24, "532276": 24, "585537": 24, "119396": 24, "linear_101": 24, "585871": 24, "719224": 24, "205809": 24, "linear_100": 24, "751382": 24, "081648": 24, "linear_102": 24, "593344": 24, "450581": 24, "551147": 24, "linear_103": 24, "592681": 24, "705824": 24, "257959": 24, "linear_104": 24, "752957": 24, "980955": 24, "110489": 24, "linear_105": 24, "696240": 24, "877193": 24, "608953": 24, "linear_106": 24, "059659": 24, "643138": 24, "048950": 24, "linear_108": 24, "975461": 24, "589567": 24, "671457": 24, "linear_107": 24, "190381": 24, "515701": 24, "linear_109": 24, "710759": 24, "305635": 24, "082436": 24, "linear_110": 24, "531228": 24, "731162": 24, "159557": 24, "linear_111": 24, "528083": 24, "259322": 24, "211544": 24, "linear_112": 24, "148807": 24, "500842": 24, "087374": 24, "linear_113": 24, "592566": 24, "948851": 24, "166611": 24, "linear_115": 24, "437109": 24, "608947": 24, "642395": 24, "linear_114": 24, "193942": 24, "503904": 24, "linear_116": 24, "966980": 24, "200896": 24, "676392": 24, "linear_117": 24, "451303": 24, "061664": 24, "951344": 24, "linear_118": 24, "077262": 24, "965800": 24, "023804": 24, "linear_119": 24, "671615": 24, "847613": 24, "198460": 24, "linear_120": 24, "625638": 24, "131427": 24, "556595": 24, "linear_122": 24, "274080": 24, "888716": 24, "978189": 24, "linear_121": 24, "420480": 24, "429659": 24, "linear_123": 24, "826197": 24, "599617": 24, "281532": 24, "linear_124": 24, "396383": 24, "325849": 24, "335875": 24, "linear_125": 24, "337198": 24, "941410": 24, "221970": 24, "linear_126": 24, "699965": 24, "842878": 24, "224073": 24, "linear_127": 24, "775370": 24, "884215": 24, "696438": 24, "linear_129": 24, "872276": 24, "837319": 24, "254213": 24, "linear_128": 24, "180057": 24, "687883": 24, "linear_130": 24, "150427": 24, "454298": 24, "765789": 24, "linear_131": 24, "112692": 24, "924847": 24, "025545": 24, "linear_132": 24, "852893": 24, "116593": 24, "749626": 24, "linear_133": 24, "517084": 24, "024665": 24, "275314": 24, "linear_134": 24, "683807": 24, "878618": 24, "743618": 24, "linear_136": 24, "421055": 24, "322729": 24, "086264": 24, "linear_135": 24, "309880": 24, "917679": 24, "linear_137": 24, "827781": 24, "744595": 24, "915554": 24, "linear_138": 24, "422395": 24, "742882": 24, "402161": 24, "linear_139": 24, "527538": 24, "866123": 24, "849449": 24, "linear_140": 24, "128619": 24, "657793": 24, "266134": 24, "linear_141": 24, "839593": 24, "845993": 24, "021378": 24, "linear_143": 24, "442304": 24, "099039": 24, "889746": 24, "linear_142": 24, "325038": 24, "849592": 24, "linear_144": 24, "929444": 24, "618206": 24, "605080": 24, "linear_145": 24, "382126": 24, "321095": 24, "625010": 24, "linear_146": 24, "894987": 24, "867645": 24, "836517": 24, "linear_147": 24, "915313": 24, "906028": 24, "886522": 24, "linear_148": 24, "614287": 24, "908151": 24, "496181": 24, "linear_150": 24, "724932": 24, "485588": 24, "312899": 24, "linear_149": 24, "161146": 24, "606939": 24, "linear_151": 24, "164453": 24, "847355": 24, "719223": 24, "linear_152": 24, "086471": 24, "984121": 24, "222834": 24, "linear_153": 24, "099524": 24, "991601": 24, "816805": 24, "linear_154": 24, "054585": 24, "489706": 24, "286930": 24, "linear_155": 24, "389185": 24, "100321": 24, "963501": 24, "linear_157": 24, "982999": 24, "154796": 24, "637253": 24, "linear_156": 24, "537706": 24, "875190": 24, "linear_158": 24, "420287": 24, "502287": 24, "531588": 24, "linear_159": 24, "014746": 24, "423280": 24, "477261": 24, "linear_160": 24, "633553": 24, "715335": 24, "220921": 24, "linear_161": 24, "371849": 24, "117830": 24, "815203": 24, "linear_162": 24, "492933": 24, "126283": 24, "623318": 24, "linear_164": 24, "697504": 24, "825712": 24, "317358": 24, "linear_163": 24, "078367": 24, "008038": 24, "linear_165": 24, "023975": 24, "836278": 24, "577358": 24, "linear_166": 24, "860619": 24, "259792": 24, "493614": 24, "linear_167": 24, "380934": 24, "496160": 24, "107042": 24, "linear_168": 24, "691216": 24, "733317": 24, "831076": 24, "linear_169": 24, "723948": 24, "952728": 24, "129707": 24, "linear_171": 24, "034811": 24, "366547": 24, "665123": 24, "linear_170": 24, "356277": 24, "710501": 24, "linear_172": 24, "556884": 24, "729481": 24, "166058": 24, "linear_173": 24, "033039": 24, "207264": 24, "442120": 24, "linear_174": 24, "597379": 24, "658676": 24, "768131": 24, "linear_2": [24, 25], "293503": 24, "305265": 24, "877850": 24, "linear_1": [24, 25], "812222": 24, "766452": 24, "487047": 24, "linear_3": [24, 25], "999999": 24, "999755": 24, "031174": 24, "wish": [24, 25], "955k": 24, "18k": 24, "inparam": [24, 25], "inbin": [24, 25], "outparam": [24, 25], "outbin": [24, 25], "99m": 24, "78k": 24, "774k": [24, 25], "496": [24, 25, 38, 42], "replac": [24, 25], "774": [24, 25], "linear": [24, 25, 35], "convolut": [24, 25, 43, 53, 56], "exact": [24, 25], "4x": [24, 25], "comparison": 24, "468000": [25, 29, 55], "lstm_transducer_stateless2": [25, 29, 55], "862": 25, "222": [25, 36, 38], "865": 25, "is_pnnx": 25, "62e404dd3f3a811d73e424199b3408e309c06e1a": [25, 26], "6d7a559": [25, 26], "feb": [25, 26, 35], "147": [25, 26], "rnn_hidden_s": 25, "aux_layer_period": 25, "235": 25, "239": [25, 35], "472": 25, "595": 25, "324": 25, "83137520": 25, "596": 25, "325": 25, "257024": 25, "781812": 25, "327": 25, "84176356": 25, "182": [25, 26, 33, 42], "158": 25, "183": [25, 46, 47], "335": 25, "101": 25, "tracerwarn": [25, 26], "boolean": [25, 26], "caus": [25, 26, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "incorrect": [25, 26, 35], "flow": [25, 26], "constant": [25, 26], "futur": [25, 26, 35, 61], "need_pad": 25, "bool": 25, "259": [25, 33], "339": 25, "207": [25, 36, 38], "84": [25, 33], "324m": 25, "321": [25, 33], "107": [25, 42], "318m": 25, "159m": 25, "21k": 25, "159": [25, 38, 49], "37": [25, 33, 35, 38, 46], "861": 25, "266": [25, 26, 38, 42], "431": 25, "342": 25, "343": 25, "379": 25, "268": [25, 38, 42], "317m": 25, "317": 25, "conv_15": 25, "930708": 25, "972025": 25, "conv_16": 25, "978855": 25, "031788": 25, "456645": 25, "conv_17": 25, "868437": 25, "830528": 25, "218575": 25, "linear_18": 25, "107259": 25, "194808": 25, "293236": 25, "linear_19": 25, "193777": 25, "634748": 25, "401705": 25, "linear_20": 25, "259933": 25, "606617": 25, "722160": 25, "linear_21": 25, "186600": 25, "790260": 25, "512129": 25, "linear_22": 25, "759041": 25, "265832": 25, "050053": 25, "linear_23": 25, "931209": 25, "099090": 25, "979767": 25, "linear_24": 25, "324160": 25, "215561": 25, "321835": 25, "linear_25": 25, "800708": 25, "599352": 25, "284134": 25, "linear_26": 25, "492444": 25, "153369": 25, "274391": 25, "linear_27": 25, "660161": 25, "720994": 25, "674126": 25, "linear_28": 25, "415265": 25, "174434": 25, "007133": 25, "linear_29": 25, "038418": 25, "118534": 25, "724262": 25, "linear_30": 25, "072084": 25, "936867": 25, "259155": 25, "linear_31": 25, "342712": 25, "599489": 25, "282787": 25, "linear_32": 25, "340535": 25, "120308": 25, "701103": 25, "linear_33": 25, "846987": 25, "630030": 25, "985939": 25, "linear_34": 25, "686298": 25, "204571": 25, "607586": 25, "linear_35": 25, "904821": 25, "575518": 25, "756420": 25, "linear_36": 25, "806659": 25, "585589": 25, "118401": 25, "linear_37": 25, "402340": 25, "047157": 25, "162680": 25, "linear_38": 25, "174589": 25, "923361": 25, "030258": 25, "linear_39": 25, "178576": 25, "556058": 25, "807705": 25, "linear_40": 25, "901954": 25, "301267": 25, "956539": 25, "linear_41": 25, "839805": 25, "597429": 25, "716181": 25, "linear_42": 25, "178945": 25, "651595": 25, "895699": 25, "829245": 25, "627592": 25, "637907": 25, "746186": 25, "255032": 25, "167313": 25, "000000": 25, "999756": 25, "031013": 25, "345k": 25, "17k": 25, "218m": 25, "counterpart": 25, "bit": [25, 33, 35, 36, 38, 42, 49], "4532": 25, "feedforward": [26, 35, 41, 56], "384": [26, 38], "unmask": 26, "downsampl": [26, 34], "factor": [26, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "473": [26, 38], "246": [26, 35, 38, 46, 47], "477": 26, "warm_step": 26, "2000": [26, 36], "feedforward_dim": 26, "attention_dim": [26, 33, 35, 38], "encoder_unmasked_dim": 26, "zipformer_downsampling_factor": 26, "decode_chunk_len": 26, "257": [26, 35, 46, 47], "023": 26, "zipformer2": 26, "419": 26, "At": [26, 33, 38], "stack": 26, "downsampling_factor": 26, "037": 26, "655": 26, "346": 26, "68944004": 26, "347": 26, "260096": 26, "348": [26, 46], "716276": 26, "656": [26, 38], "349": 26, "69920376": 26, "351": 26, "353": 26, "174": [26, 38], "175": 26, "1344": 26, "assert": 26, "cached_len": 26, "num_lay": 26, "1348": 26, "cached_avg": 26, "1352": 26, "cached_kei": 26, "1356": 26, "cached_v": 26, "1360": 26, "cached_val2": 26, "1364": 26, "cached_conv1": 26, "1368": 26, "cached_conv2": 26, "1373": 26, "left_context_len": 26, "1884": 26, "x_size": 26, "2442": 26, "2449": 26, "2469": 26, "2473": 26, "2483": 26, "kv_len": 26, "k": [26, 41, 46, 47, 55, 56, 57], "2570": 26, "attn_output": 26, "bsz": 26, "num_head": 26, "seq_len": 26, "head_dim": 26, "2926": 26, "lorder": 26, "2652": 26, "2653": 26, "embed_dim": 26, "2666": 26, "1543": 26, "in_x_siz": 26, "1637": 26, "1643": 26, "in_channel": 26, "1571": 26, "1763": 26, "src1": 26, "src2": 26, "1779": 26, "dim1": 26, "1780": 26, "dim2": 26, "_trace": 26, "958": 26, "tracer": 26, "instead": [26, 35, 56], "tupl": 26, "namedtupl": 26, "absolut": 26, "know": [26, 39], "side": 26, "allow": [26, 41, 56], "behavior": [26, 35], "_c": 26, "_create_method_from_trac": 26, "646": 26, "357": 26, "102": [26, 33], "embedding_out": 26, "686": 26, "361": [26, 38, 42], "735": 26, "69": 26, "269m": 26, "269": [26, 33, 46, 47], "725": [26, 42], "1022k": 26, "266m": 26, "8m": 26, "509k": 26, "133m": 26, "152k": 26, "4m": 26, "1022": 26, "133": 26, "509": 26, "360": 26, "365": 26, "280": [26, 38], "372": [26, 33], "state": [26, 33, 35, 36, 38, 41, 43, 44, 51, 55, 56, 57], "026": 26, "410": 26, "411": [26, 38], "2028": 26, "2547": 26, "2029": 26, "23316": 26, "23317": 26, "23318": 26, "23319": 26, "23320": 26, "amount": [26, 32, 34], "pad": [26, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "conv2dsubsampl": 26, "arrai": 26, "23300": 26, "element": 26, "repo_url": 27, "basenam": 27, "why": 28, "streaming_asr": [28, 29, 55, 56, 57], "conv_emform": 28, "offline_asr": [28, 41], "baz": 29, "subset": [31, 38, 41, 43, 44, 55, 56, 57], "instruct": 31, "full": [31, 38, 39, 41, 43, 44, 55, 56, 57], "intial": 31, "decode_gigaspeech": 31, "1000": [31, 38, 59, 60], "whole": [31, 38, 42, 46, 47, 56, 57], "previou": [31, 51], "stateless": [31, 34, 37, 41, 55, 56, 57], "due": [31, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "vocabulari": [31, 35], "experi": [31, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56, 57], "use_mux": 31, "do_finetun": 31, "world": [31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 51, 55, 56, 57, 59, 60], "exp_giga_finetun": 31, "_mux": 31, "0045": 31, "mux": 31, "13024": 31, "ckpt": 31, "forget": 31, "quickli": 31, "certain": [31, 32], "mix": 31, "maintain": 31, "ones": 31, "lower": [31, 55], "public": 32, "capabl": 32, "high": [32, 34], "label": 32, "1best": [33, 36, 38, 42, 43, 44, 46, 47], "automag": [33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "stop": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "By": [33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "musan": [33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "apt": [33, 36], "permiss": [33, 36], "commandlin": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "multi": [33, 35, 36, 38, 39, 41, 43, 44, 53, 55, 56, 57], "machin": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "ddp": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "implement": [33, 35, 36, 38, 39, 41, 43, 44, 53, 55, 56, 57], "utter": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "oom": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "decai": [33, 36, 38, 43, 44, 55], "warmup": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "function": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "get_param": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "directli": [33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "perturb": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "3x150": [33, 35, 36], "450": [33, 35, 36], "visual": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "logdir": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "labelsmooth": 33, "tensorflow": [33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "press": [33, 35, 36, 38, 41, 43, 44, 49, 55, 56, 57], "ctrl": [33, 35, 36, 38, 41, 43, 44, 49, 55, 56, 57], "engw8ksktzqs24zbv5dgcg": 33, "2021": [33, 36, 38, 42, 46, 47, 49], "22t11": 33, "scan": [33, 35, 36, 38, 41, 49, 55, 56], "116068": 33, "scalar": [33, 35, 36, 38, 41, 49, 55, 56], "listen": [33, 35, 36, 41, 49, 55, 56], "xxxx": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "saw": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "consol": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "avoid": [33, 35, 38], "nbest": [33, 38, 44], "lattic": [33, 36, 38, 41, 42, 46, 47, 56, 57], "uniqu": [33, 38, 41, 56, 57], "pkufool": [33, 36, 42], "icefall_asr_aishell_conformer_ctc": 33, "transcrib": [33, 35, 36, 38], "lang_char": [33, 35], "bac009s0764w0121": [33, 35, 36], "bac009s0764w0122": [33, 35, 36], "bac009s0764w0123": [33, 35, 36], "tran": [33, 36, 38, 42, 46, 47], "graph": [33, 36, 38, 41, 42, 46, 47, 56, 57], "conveni": [33, 36, 38, 39], "eo": [33, 36, 38], "soxi": [33, 35, 36, 38, 42, 49], "sampl": [33, 35, 36, 38, 42, 43, 49, 56, 57], "precis": [33, 35, 36, 38, 41, 42, 49, 56, 57], "67263": [33, 35, 36], "cdda": [33, 35, 36, 38, 42, 49], "sector": [33, 35, 36, 38, 42, 49], "135k": [33, 35, 36], "256k": [33, 35, 36, 38], "sign": [33, 35, 36, 38, 49], "integ": [33, 35, 36, 38, 49], "pcm": [33, 35, 36, 38, 49], "65840": [33, 35, 36], "308": [33, 35, 36], "625": [33, 35, 36], "132k": [33, 35, 36], "64000": [33, 35, 36], "300": [33, 35, 36, 38, 39, 41, 51, 56], "128k": [33, 35, 36, 49], "displai": [33, 35, 36, 38], "topologi": [33, 38], "num_decoder_lay": [33, 38], "vgg_frontend": [33, 35, 38], "use_feat_batchnorm": [33, 38], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 33, "sun": 33, "sep": 33, "33cfe45": 33, "d57a873": 33, "nov": [33, 38], "hw": 33, "kangwei": 33, "icefall_aishell3": 33, "k2_releas": 33, "tokens_fil": 33, "num_path": [33, 38, 41, 56, 57], "ngram_lm_scal": [33, 38], "attention_decoder_scal": [33, 38], "nbest_scal": [33, 38], "sos_id": [33, 38], "eos_id": [33, 38], "4336": [33, 35], "131": [33, 38], "293": [33, 38], "369": [33, 38], "\u751a": [33, 35], "\u81f3": [33, 35], "\u51fa": [33, 35], "\u73b0": [33, 35], "\u4ea4": [33, 35], "\u6613": [33, 35], "\u51e0": [33, 35], "\u4e4e": [33, 35], "\u505c": [33, 35], "\u6b62": 33, "\u7684": [33, 35, 36], "\u60c5": [33, 35], "\u51b5": [33, 35], "\u4e00": [33, 35], "\u4e8c": [33, 35], "\u7ebf": [33, 35, 36], "\u57ce": [33, 35], "\u5e02": [33, 35], "\u867d": [33, 35], "\u7136": [33, 35], "\u4e5f": [33, 35, 36], "\u5904": [33, 35], "\u4e8e": [33, 35], "\u8c03": [33, 35], "\u6574": [33, 35], "\u4e2d": [33, 35, 36], "\u4f46": [33, 35, 36], "\u56e0": [33, 35], "\u4e3a": [33, 35], "\u805a": [33, 35], "\u96c6": [33, 35], "\u4e86": [33, 35, 36], "\u8fc7": [33, 35], "\u591a": [33, 35], "\u516c": [33, 35], "\u5171": [33, 35], "\u8d44": [33, 35], "\u6e90": [33, 35], "371": 33, "683": 33, "684": [33, 49], "651": [33, 49], "654": 33, "659": 33, "752": 33, "887": 33, "340": 33, "370": 33, "\u751a\u81f3": [33, 36], "\u51fa\u73b0": [33, 36], "\u4ea4\u6613": [33, 36], "\u51e0\u4e4e": [33, 36], "\u505c\u6b62": 33, "\u60c5\u51b5": [33, 36], "\u4e00\u4e8c": [33, 36], "\u57ce\u5e02": [33, 36], "\u867d\u7136": [33, 36], "\u5904\u4e8e": [33, 36], "\u8c03\u6574": [33, 36], "\u56e0\u4e3a": [33, 36], "\u805a\u96c6": [33, 36], "\u8fc7\u591a": [33, 36], "\u516c\u5171": [33, 36], "\u8d44\u6e90": [33, 36], "recor": [33, 38], "highest": [33, 38], "966": 33, "821": 33, "822": 33, "826": 33, "916": 33, "345": 33, "889": 33, "limit": [33, 35, 38, 53, 56], "upgrad": [33, 38], "NOT": [33, 35, 38, 49], "checkout": [33, 38], "hlg_decod": [33, 38], "four": [33, 38], "messag": [33, 38, 41, 43, 44, 55, 56, 57], "use_gpu": [33, 38], "word_tabl": [33, 38], "forward": [33, 38, 43], "cu": [33, 38], "int": [33, 38], "char": [33, 38], "98": 33, "150": [33, 38], "693": [33, 46], "165": [33, 38], "nnet_output": [33, 38], "185": [33, 38, 49], "217": [33, 38], "mandarin": 34, "beij": 34, "shell": 34, "technologi": 34, "ltd": 34, "peopl": 34, "accent": 34, "area": 34, "china": 34, "invit": 34, "particip": 34, "conduct": 34, "indoor": 34, "fidel": 34, "microphon": 34, "16khz": 34, "manual": 34, "through": 34, "profession": 34, "annot": 34, "inspect": 34, "free": [34, 39, 51, 55], "academ": 34, "moder": 34, "research": 34, "openslr": [34, 51], "ctc": [34, 37, 40, 44, 45, 48], "conv1d": [35, 41, 55, 56, 57], "tanh": 35, "borrow": 35, "ieeexplor": 35, "ieee": 35, "stamp": 35, "jsp": 35, "arnumb": 35, "9054419": 35, "predict": [35, 39, 41, 55, 56, 57], "charact": 35, "unit": 35, "87939824": 35, "optimized_transduc": 35, "technqiu": 35, "end": [35, 41, 43, 44, 49, 55, 56, 57, 59, 60], "maximum": 35, "emit": 35, "simplifi": [35, 53], "significantli": 35, "degrad": 35, "exactli": 35, "unprun": 35, "advantag": 35, "minim": 35, "pruned_transducer_stateless": [35, 41, 53, 56], "altern": 35, "though": 35, "transducer_stateless_modifi": 35, "pr": 35, "ram": 35, "tri": 35, "prob": [35, 55], "219": [35, 38], "lagz6hrcqxoigbfd5e0y3q": 35, "03t14": 35, "8477": 35, "250": [35, 42], "sym": [35, 41, 56, 57], "beam_search": [35, 41, 56, 57], "decoding_method": 35, "beam_4": 35, "ensur": 35, "poor": 35, "531": [35, 36], "994": [35, 38], "027": 35, "encoder_out_dim": 35, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 35, "50d2281": 35, "mar": 35, "0815224919": 35, "75d558775b": 35, "mmnv8": 35, "72": [35, 38], "878": [35, 47], "880": 35, "891": 35, "113": [35, 38], "userwarn": 35, "__floordiv__": 35, "round": 35, "toward": 35, "trunc": 35, "floor": 35, "keep": [35, 41, 56, 57], "div": 35, "b": [35, 38, 46, 47], "rounding_mod": 35, "divis": 35, "x_len": 35, "163": [35, 38], "\u6ede": 35, "322": 35, "759": 35, "760": 35, "919": 35, "922": 35, "929": 35, "046": 35, "319": [35, 38], "798": 35, "831": [35, 47], "215": [35, 38, 42], "402": 35, "topk_hyp_index": 35, "topk_index": 35, "logit": 35, "583": [35, 47], "lji9mwuorlow3jkdhxwk8a": 36, "13t11": 36, "4454": 36, "icefall_asr_aishell_tdnn_lstm_ctc": 36, "858": [36, 38], "389": [36, 38], "161": [36, 38], "536": 36, "539": 36, "917": 36, "\u505c\u6ede": 36, "mmi": [37, 40], "blank": [37, 40], "skip": [37, 39, 40, 41, 55, 56, 57], "distil": [37, 40], "hubert": [37, 40], "ligru": [37, 45], "libri": [38, 39, 41, 43, 44, 55, 56, 57], "3x960": [38, 41, 43, 44, 55, 56, 57], "2880": [38, 41, 43, 44, 55, 56, 57], "lzgnetjwrxc3yghnmd4kpw": 38, "24t16": 38, "4540": 38, "sentenc": [38, 51], "piec": 38, "And": [38, 41, 43, 44, 55, 56, 57], "neither": 38, "nor": 38, "5000": 38, "033": 38, "537": 38, "full_libri": [38, 39], "464": 38, "548": 38, "776": 38, "652": [38, 49], "109226120": 38, "714": [38, 46], "206": 38, "944": 38, "1328": 38, "443": [38, 42], "2563": 38, "494": 38, "592": 38, "1715": 38, "52576": 38, "128": 38, "1424": 38, "807": 38, "506": 38, "808": [38, 46], "362": 38, "1477": 38, "2922": 38, "4295": 38, "52343": 38, "396": 38, "3584": 38, "432": 38, "680": [38, 46], "_pickl": 38, "unpicklingerror": 38, "invalid": 38, "hlg_modifi": 38, "g_4_gram": [38, 42, 46, 47], "sentencepiec": 38, "875": [38, 42], "212k": 38, "267440": [38, 42], "1253": [38, 42], "535k": 38, "77200": [38, 42], "154k": 38, "554": 38, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 38, "8d93169": 38, "601": 38, "025": 38, "broffel": 38, "osom": 38, "723": 38, "775": 38, "881": 38, "571": 38, "857": 38, "979": 38, "055": 38, "117": 38, "051": 38, "363": 38, "959": [38, 47], "546": 38, "598": 38, "599": [38, 42], "833": 38, "834": 38, "915": 38, "076": 38, "110": 38, "397": 38, "999": [38, 41, 56, 57], "concaten": 38, "bucket": 38, "sampler": 38, "ctc_decod": 38, "ngram_lm_rescor": 38, "attention_rescor": 38, "105": 38, "125": [38, 49], "228": 38, "543": 38, "topo": 38, "547": 38, "729": 38, "703": 38, "545": 38, "122": 38, "126": 38, "135": [38, 49], "153": [38, 49], "945": 38, "475": 38, "191": [38, 46, 47], "398": 38, "515": 38, "deseri": 38, "441": 38, "fsaclass": 38, "loadfsa": 38, "const": 38, "string": 38, "c10": 38, "ignor": 38, "589": 38, "attention_scal": 38, "162": 38, "169": [38, 46, 47], "188": 38, "984": 38, "624": 38, "519": [38, 47], "632": 38, "645": [38, 49], "243": 38, "303": 38, "179": 38, "knowledg": 39, "vector": 39, "mvq": 39, "kd": 39, "pruned_transducer_stateless4": [39, 41, 53, 56], "theoret": 39, "applic": 39, "minor": 39, "stop_stag": [39, 59, 60], "thing": 39, "distillation_with_hubert": 39, "Of": 39, "cours": 39, "xl": 39, "proce": 39, "960h": [39, 43], "use_extracted_codebook": 39, "augment": 39, "th": [39, 46, 47], "embedding_lay": 39, "num_codebook": 39, "under": [39, 51], "vq_fbank_layer36_cb8": 39, "whola": 39, "snippet": 39, "echo": 39, "awk": 39, "split": 39, "pruned_transducer_stateless6": 39, "12359": 39, "spec": 39, "warp": 39, "paid": 39, "suitabl": [41, 55, 56, 57], "pruned_transducer_stateless2": [41, 53, 56], "pruned_transducer_stateless5": [41, 53, 56], "scroll": [41, 43, 44, 55, 56, 57], "arxiv": [41, 55, 56, 57], "2206": [41, 55, 56, 57], "13236": [41, 55, 56, 57], "rework": [41, 53, 56], "daniel": [41, 56, 57], "joint": [41, 55, 56, 57], "contrari": [41, 55, 56, 57], "convent": [41, 55, 56, 57], "recurr": [41, 55, 56, 57], "2x": [41, 56, 57], "littl": [41, 56], "436000": [41, 43, 44, 55, 56, 57], "438000": [41, 43, 44, 55, 56, 57], "qogspbgsr8kzcrmmie9jgw": 41, "20t15": [41, 55, 56], "4468": [41, 55, 56], "210171": [41, 55, 56], "access": [41, 43, 44, 55, 56, 57], "googl": [41, 43, 44, 55, 56, 57], "6008": [41, 43, 44, 55, 56, 57], "localhost": [41, 43, 44, 55, 56, 57], "expos": [41, 43, 44, 55, 56, 57], "proxi": [41, 43, 44, 55, 56, 57], "bind_al": [41, 43, 44, 55, 56, 57], "fast_beam_search": [41, 43, 55, 56, 57], "474000": [41, 55, 56, 57], "largest": [41, 56, 57], "posterior": [41, 43, 56, 57], "algorithm": [41, 56, 57], "pdf": [41, 44, 56, 57], "1211": [41, 56, 57], "3711": [41, 56, 57], "espnet": [41, 56, 57], "net": [41, 56, 57], "beam_search_transduc": [41, 56, 57], "basic": [41, 56], "topk": [41, 56, 57], "expand": [41, 56, 57], "mode": [41, 56, 57], "being": [41, 56, 57], "hardcod": [41, 56, 57], "composit": [41, 56, 57], "log_prob": [41, 56, 57], "hard": [41, 53, 56, 57], "2211": [41, 56, 57], "00484": [41, 56, 57], "fast_beam_search_lg": [41, 56, 57], "trivial": [41, 56, 57], "fast_beam_search_nbest": [41, 56, 57], "random_path": [41, 56, 57], "shortest": [41, 56, 57], "fast_beam_search_nbest_lg": [41, 56, 57], "logic": [41, 56, 57], "smallest": [41, 55, 56, 57], "normal": [42, 46, 47, 49, 56], "icefall_asr_librispeech_tdnn": 42, "lstm_ctc": 42, "flac": 42, "116k": 42, "140k": 42, "343k": 42, "164k": 42, "105k": 42, "174k": 42, "pretraind": 42, "584": [42, 47], "791": 42, "245": 42, "098": 42, "099": 42, "methond": [42, 46, 47], "631": 42, "010": 42, "guidanc": 43, "bigger": 43, "simpli": 43, "discard": 43, "prevent": 43, "lconv": 43, "encourag": [43, 44, 55], "stabil": [43, 44], "doesn": 43, "warm": [43, 44], "xyozukpeqm62hbilud4upa": [43, 44], "ctc_guide_decode_b": 43, "pretrained_ctc": 43, "jit_pretrained_ctc": 43, "100h": 43, "yfyeung": 43, "wechat": 44, "zipformer_mmi": 44, "worker": [44, 55], "hp": 44, "tdnn_ligru_ctc": 46, "enough": [46, 47, 49, 51], "luomingshuang": [46, 47], "icefall_asr_timit_tdnn_ligru_ctc": 46, "pretrained_average_9_25": 46, "fdhc0_si1559": [46, 47], "felc0_si756": [46, 47], "fmgd0_si1564": [46, 47], "ffprobe": [46, 47], "show_format": [46, 47], "nistspher": [46, 47], "database_id": [46, 47], "database_vers": [46, 47], "utterance_id": [46, 47], "dhc0_si1559": [46, 47], "sample_min": [46, 47], "4176": [46, 47], "sample_max": [46, 47], "5984": [46, 47], "bitrat": [46, 47], "pcm_s16le": [46, 47], "s16": [46, 47], "elc0_si756": [46, 47], "1546": [46, 47], "1989": [46, 47], "mgd0_si1564": [46, 47], "7626": [46, 47], "10573": [46, 47], "660": 46, "695": 46, "697": 46, "819": 46, "829": 46, "sil": [46, 47], "dh": [46, 47], "ih": [46, 47], "uw": [46, 47], "ah": [46, 47], "ii": [46, 47], "z": [46, 47], "aa": [46, 47], "ei": [46, 47], "dx": [46, 47], "d": [46, 47, 51], "uh": [46, 47], "ng": [46, 47], "eh": [46, 47], "jh": [46, 47], "er": [46, 47], "ai": [46, 47], "hh": [46, 47], "aw": 46, "ae": [46, 47], "705": 46, "715": 46, "720": 46, "251": [46, 47], "ch": 46, "icefall_asr_timit_tdnn_lstm_ctc": 47, "pretrained_average_16_25": 47, "816": 47, "827": 47, "unk": 47, "739": 47, "977": 47, "978": 47, "981": 47, "ow": 47, "ykubhb5wrmosxykid1z9eg": 49, "23t23": 49, "icefall_asr_yesno_tdnn": 49, "0_0_1_0_0_1_1_1": 49, "0_0_1_0_1_0_0_1": 49, "0_0_1_1_0_0_0_1": 49, "0_0_1_1_0_1_1_0": 49, "0_0_1_1_1_0_0_0": 49, "0_0_1_1_1_1_0_0": 49, "0_1_0_0_0_1_0_0": 49, "0_1_0_0_1_0_1_0": 49, "0_1_0_1_0_0_0_0": 49, "0_1_0_1_1_1_0_0": 49, "0_1_1_0_0_1_1_1": 49, "0_1_1_1_0_0_1_0": 49, "0_1_1_1_1_0_1_0": 49, "1_0_0_0_0_0_0_0": 49, "1_0_0_0_0_0_1_1": 49, "1_0_0_1_0_1_1_1": 49, "1_0_1_1_0_1_1_1": 49, "1_0_1_1_1_1_0_1": 49, "1_1_0_0_0_1_1_1": 49, "1_1_0_0_1_0_1_1": 49, "1_1_0_1_0_1_0_0": 49, "1_1_0_1_1_0_0_1": 49, "1_1_0_1_1_1_1_0": 49, "1_1_1_0_0_1_0_1": 49, "1_1_1_0_1_0_1_0": 49, "1_1_1_1_0_0_1_0": 49, "1_1_1_1_1_0_0_0": 49, "1_1_1_1_1_1_1_1": 49, "54080": 49, "507": 49, "108k": 49, "119": 49, "650": 49, "139": 49, "143": 49, "198": 49, "181": 49, "186": 49, "187": 49, "correctli": 49, "simplest": 49, "nnlm": 51, "complet": 51, "wget": 51, "resourc": 51, "norm": 51, "gzip": 51, "prepare_lm_training_data": 51, "lm_data": 51, "grab": 51, "cup": 51, "coffe": 51, "sort_lm_training_data": 51, "sorted_lm_data": 51, "statist": 51, "lm_data_stat": 51, "aforement": 51, "repeat": 51, "rnn_lm": 51, "tie": 51, "hyper": [51, 59, 60], "coupl": [51, 59, 60], "dai": [51, 59, 60], "former": 53, "mask": [53, 56, 57], "wenet": 53, "did": 53, "request": 53, "complic": 53, "techniqu": 53, "bank": 53, "memor": 53, "histori": 53, "introduc": 53, "variant": 53, "pruned_stateless_emformer_rnnt2": 53, "conv_emformer_transducer_stateless": 53, "ourself": 53, "mechan": 53, "onlin": 55, "lstm_transducer_stateless": 55, "prepare_giga_speech": 55, "cj2vtpiwqhkn9q1tx6ptpg": 55, "dynam": [56, 57], "causal": 56, "short": [56, 57], "2012": 56, "05481": 56, "flag": 56, "indic": [56, 57], "whether": 56, "sequenc": [56, 57], "uniformli": [56, 57], "seen": [56, 57], "97vkxf80ru61cnp2alwzzg": 56, "streaming_decod": [56, 57], "wise": [56, 57], "parallel": [56, 57], "bath": [56, 57], "parallelli": [56, 57], "seem": 56, "benefit": 56, "320m": 57, "550": 57, "basicli": 57, "scriptmodul": 57, "jit_trace_export": 57, "jit_trace_pretrain": 57, "monoton": 58, "align": 58, "condit": [59, 60], "variat": [59, 60], "autoencod": [59, 60], "adversari": [59, 60], "monotonic_align": [59, 60], "build_ext": [59, 60], "inplac": [59, 60], "ground": [59, 60], "truth": [59, 60], "test_onnx": [59, 60], "2024": 59, "350": 60, "zrjin": 60, "synthesi": 61, "task": 61}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 22, 28, 29], "creat": [2, 13, 21], "recip": [2, 61], "data": [2, 9, 11, 21, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "prepar": [2, 9, 11, 21, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "train": [2, 9, 16, 18, 21, 24, 25, 26, 27, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "decod": [2, 5, 6, 7, 9, 12, 21, 22, 27, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "pre": [2, 18, 24, 25, 26, 27, 31, 32, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "model": [2, 5, 15, 18, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "lodr": [4, 6], "rnn": [4, 50, 51], "transduc": [4, 6, 7, 24, 25, 26, 35, 41, 55, 56, 57], "wer": [4, 6, 7, 38], "differ": [4, 6, 7], "beam": [4, 6, 7, 35], "size": [4, 6, 7], "languag": [5, 51], "lm": [6, 38, 50], "rescor": [6, 33, 38], "base": 6, "method": 6, "v": 6, "shallow": [6, 7], "fusion": [6, 7], "The": [6, 35], "number": 6, "each": 6, "field": 6, "i": 6, "test": [6, 7, 21, 24, 25, 26], "clean": [6, 7], "other": 6, "time": [6, 7], "docker": [8, 9], "introduct": [9, 53], "view": 9, "avail": 9, "tag": 9, "cuda": [9, 21], "enabl": 9, "imag": 9, "cpu": 9, "onli": 9, "download": [9, 11, 21, 24, 25, 26, 27, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "run": [9, 22], "gpu": 9, "yesno": [9, 48], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 11, 27], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 21, 24, 25, 26, 33, 35, 36, 38], "0": [10, 21], "No": 10, "For": [11, 12, 13, 15, 16], "more": [11, 12, 13, 15, 16], "curiou": [11, 12, 13, 15, 16], "A": 11, "quick": 11, "look": 11, "gener": 11, "environ": [13, 21], "setup": 13, "virtual": [13, 21], "instal": [13, 21, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47], "depend": 13, "icefal": [13, 14, 20, 21, 24, 25, 26], "dummi": 14, "tutori": 14, "export": [15, 22, 23, 24, 25, 26, 27, 28, 29, 30, 41, 43, 44, 55, 56, 57, 59, 60], "paramet": 15, "via": [15, 24, 25, 26], "state_dict": [15, 22, 41, 43, 44, 55, 56, 57], "torch": [15, 21, 24, 25, 26, 28, 29, 41, 43, 44, 55, 56, 57], "jit": [15, 24, 25, 26, 28, 29, 41, 43, 44, 55, 56, 57], "script": [15, 28, 41, 43, 44, 56, 57], "onnx": [15, 27], "huggingfac": [17, 19], "space": 19, "youtub": [19, 21], "video": [19, 21], "content": [20, 32, 61], "toolkit": 21, "cudnn": 21, "torchaudio": 21, "2": [21, 24, 25, 26, 33, 35, 36, 38], "k2": 21, "3": [21, 24, 25, 26, 33, 35, 38], "lhots": 21, "4": [21, 24, 25, 26], "exampl": [21, 27, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "5": [21, 24, 25, 26], "6": [21, 24, 25, 26], "your": 21, "when": [22, 28, 29], "us": [22, 28, 29, 41, 43, 44, 55, 56, 57], "py": 22, "ncnn": [23, 24, 25, 26], "convemform": 24, "pnnx": [24, 25, 26], "trace": [24, 25, 26, 29, 55, 57], "torchscript": [24, 25, 26], "modifi": [24, 25, 26, 35], "encod": [24, 25, 26], "sherpa": [24, 25, 26, 27, 41, 56, 57], "7": [24, 25], "option": [24, 25, 33, 36, 38, 41, 43, 44, 55, 56, 57], "int8": [24, 25], "quantiz": [24, 25], "lstm": [25, 36, 42, 47, 55], "stream": [26, 37, 52, 53, 56, 57], "zipform": [26, 31, 43, 44, 57], "sound": 27, "finetun": 31, "from": 31, "supervis": 31, "fine": [31, 32], "tune": [31, 32], "tabl": [32, 61], "conform": [33, 38, 53], "ctc": [33, 36, 38, 42, 43, 46, 47, 49], "configur": [33, 36, 38, 41, 43, 44, 55, 56, 57], "log": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "usag": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "case": [33, 35, 36, 38], "kaldifeat": [33, 35, 36, 38, 42, 46, 47, 49], "hlg": [33, 36, 38], "attent": [33, 38], "colab": [33, 35, 36, 38, 42, 46, 47, 49], "notebook": [33, 35, 36, 38, 42, 46, 47, 49], "deploy": [33, 38], "c": [33, 38], "aishel": 34, "stateless": 35, "loss": 35, "todo": 35, "greedi": 35, "search": [35, 59, 60], "tdnn": [36, 42, 46, 47, 49], "non": 37, "asr": [37, 52], "comput": 38, "n": 38, "gram": 38, "distil": 39, "hubert": 39, "codebook": 39, "index": 39, "librispeech": [40, 54], "prune": [41, 56], "statelessx": [41, 56], "pretrain": [41, 43, 44, 55, 56, 57, 59, 60], "deploi": [41, 56, 57], "infer": [42, 46, 47, 49, 59, 60], "blank": 43, "skip": 43, "mmi": 44, "timit": 45, "ligru": 46, "an": 51, "emform": 53, "which": 55, "simul": [56, 57], "real": [56, 57], "tt": 58, "vit": [59, 60], "ljspeech": 59, "build": [59, 60], "monoton": [59, 60], "align": [59, 60], "vctk": 60}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 58}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [11, "data-preparation"], [35, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [21, "training"], [33, "training"], [35, "training"], [36, "training"], [38, "training"], [39, "training"], [41, "training"], [42, "training"], [43, "training"], [44, "training"], [46, "training"], [47, "training"], [49, "training"], [55, "training"], [56, "training"], [57, "training"], [59, "training"], [60, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [12, "decoding"], [21, "decoding"], [33, "decoding"], [35, "decoding"], [36, "decoding"], [38, "decoding"], [39, "decoding"], [41, "decoding"], [42, "decoding"], [43, "decoding"], [44, "decoding"], [46, "decoding"], [47, "decoding"], [49, "decoding"], [55, "decoding"], [56, "decoding"], [57, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[6, "id3"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [53, "introduction"]], "View available tags": [[9, "view-available-tags"]], "CUDA-enabled docker images": [[9, "cuda-enabled-docker-images"]], "CPU-only docker images": [[9, "cpu-only-docker-images"]], "Download a docker image (CUDA)": [[9, "download-a-docker-image-cuda"]], "Download a docker image (CPU)": [[9, "download-a-docker-image-cpu"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [21, "data-preparation"], [31, "data-preparation"], [33, "data-preparation"], [36, "data-preparation"], [38, "data-preparation"], [39, "data-preparation"], [41, "data-preparation"], [42, "data-preparation"], [43, "data-preparation"], [44, "data-preparation"], [46, "data-preparation"], [47, "data-preparation"], [49, "data-preparation"], [55, "data-preparation"], [56, "data-preparation"], [57, "data-preparation"], [59, "data-preparation"], [60, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "For the more curious": [[11, "for-the-more-curious"], [12, "for-the-more-curious"], [13, "for-the-more-curious"], [15, "for-the-more-curious"], [16, "for-the-more-curious"]], "A quick look to the generated files": [[11, "a-quick-look-to-the-generated-files"]], "download": [[11, "download"]], "data": [[11, "data"]], "Environment setup": [[13, "environment-setup"]], "Create a virtual environment": [[13, "create-a-virtual-environment"]], "Install dependencies": [[13, "install-dependencies"]], "Install icefall": [[13, "install-icefall"]], "Icefall for dummies tutorial": [[14, "icefall-for-dummies-tutorial"]], "Model Export": [[15, "model-export"]], "Export the model parameters via model.state_dict()": [[15, "export-the-model-parameters-via-model-state-dict"]], "Export via torch.jit.script()": [[15, "export-via-torch-jit-script"]], "Export via torch.onnx.export()": [[15, "export-via-torch-onnx-export"]], "Huggingface": [[17, "huggingface"]], "Pre-trained models": [[18, "pre-trained-models"]], "Huggingface spaces": [[19, "huggingface-spaces"]], "YouTube Video": [[19, "youtube-video"], [21, "youtube-video"]], "Icefall": [[20, "icefall"]], "Contents:": [[20, null]], "Installation": [[21, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[21, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[21, "install-torch-and-torchaudio"]], "(2) Install k2": [[21, "install-k2"]], "(3) Install lhotse": [[21, "install-lhotse"]], "(4) Download icefall": [[21, "download-icefall"]], "Installation example": [[21, "installation-example"]], "(1) Create a virtual environment": [[21, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[21, "id1"]], "(3) Install torch and torchaudio": [[21, "id2"]], "(4) Install k2": [[21, "id3"]], "(5) Install lhotse": [[21, "id5"]], "(6) Download icefall": [[21, "id6"]], "Test Your Installation": [[21, "test-your-installation"]], "Export model.state_dict()": [[22, "export-model-state-dict"], [41, "export-model-state-dict"], [43, "export-model-state-dict"], [44, "export-model-state-dict"], [55, "export-model-state-dict"], [56, "export-model-state-dict"], [57, "export-model-state-dict"]], "When to use it": [[22, "when-to-use-it"], [28, "when-to-use-it"], [29, "when-to-use-it"]], "How to export": [[22, "how-to-export"], [28, "how-to-export"], [29, "how-to-export"]], "How to use the exported model": [[22, "how-to-use-the-exported-model"], [28, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[22, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[23, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[24, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[24, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [26, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[24, "install-ncnn-and-pnnx"], [25, "install-ncnn-and-pnnx"], [26, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[24, "export-the-model-via-torch-jit-trace"], [25, "export-the-model-via-torch-jit-trace"], [26, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[24, "export-torchscript-model-via-pnnx"], [25, "export-torchscript-model-via-pnnx"], [26, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[24, "test-the-exported-models-in-icefall"], [25, "test-the-exported-models-in-icefall"], [26, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[24, "modify-the-exported-encoder-for-sherpa-ncnn"], [25, "modify-the-exported-encoder-for-sherpa-ncnn"], [26, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[24, "optional-int8-quantization-with-sherpa-ncnn"], [25, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[25, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[26, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[27, "export-to-onnx"]], "sherpa-onnx": [[27, "sherpa-onnx"]], "Example": [[27, "example"]], "Download the pre-trained model": [[27, "download-the-pre-trained-model"], [33, "download-the-pre-trained-model"], [35, "download-the-pre-trained-model"], [36, "download-the-pre-trained-model"], [38, "download-the-pre-trained-model"], [42, "download-the-pre-trained-model"], [46, "download-the-pre-trained-model"], [47, "download-the-pre-trained-model"], [49, "download-the-pre-trained-model"]], "Export the model to ONNX": [[27, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[27, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[28, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[29, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[29, "how-to-use-the-exported-models"]], "Model export": [[30, "model-export"]], "Finetune from a supervised pre-trained Zipformer model": [[31, "finetune-from-a-supervised-pre-trained-zipformer-model"]], "Model preparation": [[31, "model-preparation"]], "Fine-tune": [[31, "fine-tune"]], "Fine-tune a pre-trained model": [[32, "fine-tune-a-pre-trained-model"]], "Table of Contents": [[32, null], [61, null]], "Conformer CTC": [[33, "conformer-ctc"], [38, "conformer-ctc"]], "Configurable options": [[33, "configurable-options"], [36, "configurable-options"], [38, "configurable-options"], [41, "configurable-options"], [43, "configurable-options"], [44, "configurable-options"], [55, "configurable-options"], [56, "configurable-options"], [57, "configurable-options"]], "Pre-configured options": [[33, "pre-configured-options"], [36, "pre-configured-options"], [38, "pre-configured-options"], [41, "pre-configured-options"], [43, "pre-configured-options"], [44, "pre-configured-options"], [55, "pre-configured-options"], [56, "pre-configured-options"], [57, "pre-configured-options"]], "Training logs": [[33, "training-logs"], [35, "training-logs"], [36, "training-logs"], [38, "training-logs"], [41, "training-logs"], [43, "training-logs"], [44, "training-logs"], [55, "training-logs"], [56, "training-logs"], [57, "training-logs"]], "Usage examples": [[33, "usage-examples"], [35, "usage-examples"], [36, "usage-examples"], [38, "usage-examples"]], "Case 1": [[33, "case-1"], [35, "case-1"], [36, "case-1"], [38, "case-1"]], "Case 2": [[33, "case-2"], [35, "case-2"], [36, "case-2"], [38, "case-2"]], "Case 3": [[33, "case-3"], [35, "case-3"], [38, "case-3"]], "Pre-trained Model": [[33, "pre-trained-model"], [35, "pre-trained-model"], [36, "pre-trained-model"], [38, "pre-trained-model"], [42, "pre-trained-model"], [46, "pre-trained-model"], [47, "pre-trained-model"], [49, "pre-trained-model"]], "Install kaldifeat": [[33, "install-kaldifeat"], [35, "install-kaldifeat"], [36, "install-kaldifeat"], [38, "install-kaldifeat"], [42, "install-kaldifeat"], [46, "install-kaldifeat"], [47, "install-kaldifeat"]], "Usage": [[33, "usage"], [35, "usage"], [36, "usage"], [38, "usage"]], "CTC decoding": [[33, "ctc-decoding"], [38, "ctc-decoding"], [38, "id2"]], "HLG decoding": [[33, "hlg-decoding"], [33, "id2"], [36, "hlg-decoding"], [38, "hlg-decoding"], [38, "id3"]], "HLG decoding + attention decoder rescoring": [[33, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[33, "colab-notebook"], [35, "colab-notebook"], [36, "colab-notebook"], [38, "colab-notebook"], [42, "colab-notebook"], [46, "colab-notebook"], [47, "colab-notebook"], [49, "colab-notebook"]], "Deployment with C++": [[33, "deployment-with-c"], [38, "deployment-with-c"]], "aishell": [[34, "aishell"]], "Stateless Transducer": [[35, "stateless-transducer"]], "The Model": [[35, "the-model"]], "The Loss": [[35, "the-loss"]], "Todo": [[35, "id1"]], "Greedy search": [[35, "greedy-search"]], "Beam search": [[35, "beam-search"]], "Modified Beam search": [[35, "modified-beam-search"]], "TDNN-LSTM CTC": [[36, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[37, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[38, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[38, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[38, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[38, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[38, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[39, "distillation-with-hubert"]], "Codebook index preparation": [[39, "codebook-index-preparation"]], "LibriSpeech": [[40, "librispeech"], [54, "librispeech"]], "Pruned transducer statelessX": [[41, "pruned-transducer-statelessx"], [56, "pruned-transducer-statelessx"]], "Usage example": [[41, "usage-example"], [43, "usage-example"], [44, "usage-example"], [55, "usage-example"], [56, "usage-example"], [57, "usage-example"]], "Export Model": [[41, "export-model"], [56, "export-model"], [57, "export-model"]], "Export model using torch.jit.script()": [[41, "export-model-using-torch-jit-script"], [43, "export-model-using-torch-jit-script"], [44, "export-model-using-torch-jit-script"], [56, "export-model-using-torch-jit-script"], [57, "export-model-using-torch-jit-script"]], "Download pretrained models": [[41, "download-pretrained-models"], [43, "download-pretrained-models"], [44, "download-pretrained-models"], [55, "download-pretrained-models"], [56, "download-pretrained-models"], [57, "download-pretrained-models"], [59, "download-pretrained-models"], [60, "download-pretrained-models"]], "Deploy with Sherpa": [[41, "deploy-with-sherpa"], [56, "deploy-with-sherpa"], [57, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[42, "tdnn-lstm-ctc"], [47, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[42, "inference-with-a-pre-trained-model"], [46, "inference-with-a-pre-trained-model"], [47, "inference-with-a-pre-trained-model"], [49, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[43, "zipformer-ctc-blank-skip"]], "Export models": [[43, "export-models"], [44, "export-models"], [55, "export-models"], [59, "export-models"], [60, "export-models"]], "Zipformer MMI": [[44, "zipformer-mmi"]], "TIMIT": [[45, "timit"]], "TDNN-LiGRU-CTC": [[46, "tdnn-ligru-ctc"]], "YesNo": [[48, "yesno"]], "TDNN-CTC": [[49, "tdnn-ctc"]], "Download kaldifeat": [[49, "download-kaldifeat"]], "RNN-LM": [[50, "rnn-lm"]], "Train an RNN language model": [[51, "train-an-rnn-language-model"]], "Streaming ASR": [[52, "streaming-asr"]], "Streaming Conformer": [[53, "streaming-conformer"]], "Streaming Emformer": [[53, "streaming-emformer"]], "LSTM Transducer": [[55, "lstm-transducer"]], "Which model to use": [[55, "which-model-to-use"]], "Export model using torch.jit.trace()": [[55, "export-model-using-torch-jit-trace"], [57, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[56, "simulate-streaming-decoding"], [57, "simulate-streaming-decoding"]], "Real streaming decoding": [[56, "real-streaming-decoding"], [57, "real-streaming-decoding"]], "Zipformer Transducer": [[57, "zipformer-transducer"]], "TTS": [[58, "tts"]], "VITS-LJSpeech": [[59, "vits-ljspeech"]], "Build Monotonic Alignment Search": [[59, "build-monotonic-alignment-search"], [60, "build-monotonic-alignment-search"]], "Inference": [[59, "inference"], [60, "inference"]], "VITS-VCTK": [[60, "vits-vctk"]], "Recipes": [[61, "recipes"]]}, "indexentries": {}})
\ No newline at end of file
+Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "for-dummies/data-preparation", "for-dummies/decoding", "for-dummies/environment-setup", "for-dummies/index", "for-dummies/model-export", "for-dummies/training", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Finetune/from_supervised/finetune_zipformer", "recipes/Finetune/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/RNN-LM/index", "recipes/RNN-LM/librispeech/lm-training", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/TTS/index", "recipes/TTS/ljspeech/vits", "recipes/TTS/vctk/vits", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "for-dummies/data-preparation.rst", "for-dummies/decoding.rst", "for-dummies/environment-setup.rst", "for-dummies/index.rst", "for-dummies/model-export.rst", "for-dummies/training.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Finetune/from_supervised/finetune_zipformer.rst", "recipes/Finetune/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/RNN-LM/index.rst", "recipes/RNN-LM/librispeech/lm-training.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/TTS/index.rst", "recipes/TTS/ljspeech/vits.rst", "recipes/TTS/vctk/vits.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Data Preparation", "Decoding", "Environment setup", "Icefall for dummies tutorial", "Model Export", "Training", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Finetune from a supervised pre-trained Zipformer model", "Fine-tune a pre-trained model", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "RNN-LM", "Train an RNN language model", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "TTS", "VITS-LJSpeech", "VITS-VCTK", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60, 61], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 30, 31, 33, 34, 35, 36, 38, 39, 42, 46, 47, 49, 51, 53, 59, 60], "tool": [0, 10, 21, 24], "make": [0, 1, 3, 24, 25, 26, 33, 35, 38, 53], "consist": [0, 35, 41, 55, 56, 57], "possibl": [0, 2, 3, 33, 38], "black": 0, "format": [0, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "flake8": 0, "check": [0, 21, 38, 51], "qualiti": [0, 34], "isort": 0, "sort": [0, 21, 51], "import": [0, 9, 10, 15, 21, 24, 56, 57], "The": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 26, 31, 33, 34, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "version": [0, 9, 13, 15, 20, 21, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 46, 47, 56], "abov": [0, 4, 6, 7, 10, 13, 15, 22, 24, 25, 26, 27, 33, 34, 35, 36, 38, 41, 43, 44, 49, 53, 55, 56, 57], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "22": [0, 9, 15, 21, 24, 25, 38, 46, 47, 49], "3": [0, 4, 6, 7, 9, 10, 11, 15, 20, 22, 23, 27, 30, 36, 39, 41, 42, 43, 44, 49, 51, 55, 56, 57, 59, 60], "0": [0, 1, 4, 6, 7, 9, 11, 13, 15, 20, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "5": [0, 7, 15, 23, 30, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59], "4": [0, 4, 5, 6, 7, 9, 10, 11, 13, 15, 20, 22, 23, 30, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "10": [0, 7, 9, 15, 20, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "1": [0, 4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 27, 28, 29, 30, 31, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "after": [0, 1, 6, 9, 11, 12, 13, 16, 19, 21, 22, 24, 25, 26, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57], "run": [0, 2, 8, 10, 11, 13, 14, 15, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "command": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 60], "git": [0, 4, 6, 7, 9, 13, 15, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 42, 46, 47, 49, 51], "clone": [0, 4, 6, 7, 9, 13, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 42, 46, 47, 49, 51], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "github": [0, 2, 6, 9, 11, 13, 15, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "com": [0, 2, 6, 9, 11, 13, 18, 19, 21, 22, 24, 25, 28, 29, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "k2": [0, 2, 9, 10, 13, 15, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 55, 56, 57], "fsa": [0, 2, 9, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 33, 35, 38, 41, 43, 44, 55, 56, 57], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 18, 19, 22, 23, 27, 28, 29, 30, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60, 61], "cd": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "pip": [0, 1, 6, 10, 13, 15, 21, 24, 27, 35], "instal": [0, 1, 4, 6, 10, 14, 15, 17, 19, 20, 22, 23, 27, 30, 31, 39, 41, 43, 44, 49, 55, 56, 57], "pre": [0, 3, 4, 6, 7, 8, 9, 15, 17, 19, 20, 21, 23, 30, 39, 61], "commit": [0, 21], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "automat": [0, 14, 19, 39], "hook": 0, "invok": 0, "fail": 0, "If": [0, 2, 4, 6, 7, 8, 9, 10, 11, 13, 15, 19, 24, 25, 26, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "ani": [0, 4, 6, 7, 13, 21, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 17, 19, 20, 24, 25, 26, 27, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "wa": [0, 22, 38, 42], "success": [0, 21, 24, 25], "pleas": [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 19, 21, 23, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "fix": [0, 9, 10, 13, 24, 25, 26, 38], "issu": [0, 4, 6, 7, 10, 21, 24, 25, 38, 39, 56, 57], "report": [0, 9, 10, 39], "some": [0, 1, 4, 6, 9, 22, 24, 25, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "i": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57], "e": [0, 2, 4, 5, 6, 7, 13, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "modifi": [0, 23, 30, 33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "file": [0, 2, 9, 14, 15, 19, 20, 22, 24, 25, 26, 28, 29, 30, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "place": [0, 21, 22, 35, 38, 42], "so": [0, 4, 6, 7, 9, 13, 19, 20, 21, 22, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 9, 15, 19, 21, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "which": [0, 2, 4, 6, 7, 9, 11, 12, 15, 19, 21, 22, 24, 25, 26, 27, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 56, 57], "ha": [0, 2, 20, 21, 23, 24, 25, 26, 27, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 53, 55, 56, 57], "been": [0, 21, 23, 24, 25, 26, 35], "befor": [0, 1, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 31, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "further": [0, 4, 6, 7, 15], "chang": [0, 4, 6, 7, 10, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "all": [0, 9, 11, 13, 14, 18, 19, 22, 24, 25, 26, 28, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "again": [0, 24, 25, 49], "should": [0, 2, 4, 6, 11, 13, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60, 61], "time": [0, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "succeed": 0, "want": [0, 4, 6, 7, 11, 13, 15, 21, 22, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "do": [0, 2, 4, 6, 13, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "Or": 0, "without": [0, 4, 6, 7, 9, 15, 17, 19, 33, 38], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 24, 25, 26, 27, 28, 29, 30, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 4, 6, 7, 8, 9, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "prepar": [1, 3, 4, 8, 14, 16, 20, 22, 32, 58], "environ": [1, 10, 11, 12, 14, 16, 20, 24, 25, 26, 31, 33, 34, 35, 36, 38, 39, 41, 42, 46, 47, 49, 56, 57], "doc": [1, 22, 53], "r": [1, 13, 21, 24, 25, 26, 46, 47], "requir": [1, 4, 6, 11, 13, 15, 21, 26, 31, 39, 51, 56, 57, 59, 60], "txt": [1, 4, 9, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 42, 46, 47, 49, 51, 59, 60], "set": [1, 4, 6, 7, 10, 12, 13, 16, 21, 24, 25, 26, 31, 32, 33, 35, 36, 38, 39, 41, 43, 44, 49, 51, 55, 56, 57], "up": [1, 21, 22, 24, 25, 26, 33, 36, 38, 39, 41, 42, 43, 44, 56, 57], "readi": [1, 33, 38, 39, 51], "refer": [1, 2, 5, 6, 7, 11, 13, 15, 21, 22, 23, 24, 25, 26, 28, 29, 33, 35, 36, 38, 41, 42, 43, 46, 47, 49, 51, 53, 56, 57], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 21, 22, 24, 25, 26, 33, 35, 38, 58], "local": [1, 9, 15, 21, 41, 43, 44, 51, 55, 56, 57], "preview": 1, "what": [1, 2, 11, 15, 21, 24, 25, 26, 35, 53], "look": [1, 2, 4, 6, 7, 14, 18, 21, 24, 25, 26, 33, 35, 36, 38, 39], "like": [1, 2, 9, 11, 19, 24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 49, 53, 55, 56], "publish": [1, 22, 34], "html": [1, 2, 10, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 41, 55, 56, 57], "gener": [1, 6, 9, 14, 15, 22, 24, 25, 26, 27, 28, 29, 32, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57, 59, 60], "view": [1, 8, 20, 24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56, 57], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "python3": [1, 9, 10, 13, 15, 21, 25, 26], "m": [1, 15, 21, 24, 25, 26, 35, 41, 43, 44, 46, 47, 55, 56, 57], "server": [1, 19, 55], "It": [1, 2, 6, 7, 9, 11, 14, 15, 17, 21, 23, 24, 25, 26, 27, 28, 29, 33, 34, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "print": [1, 12, 16, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "serv": [1, 41, 43, 44, 55, 56, 57], "port": [1, 14, 31, 39, 41, 43, 44, 55, 56, 57], "8000": [1, 11, 15, 49], "open": [1, 4, 6, 7, 9, 20, 22, 24, 25, 26, 34, 35, 38, 39], "browser": [1, 17, 19, 41, 43, 44, 55, 56, 57], "go": [1, 7, 33, 35, 38, 41, 43, 44, 55, 56, 57], "read": [2, 11, 15, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "code": [2, 3, 8, 10, 13, 15, 20, 21, 24, 25, 26, 33, 38, 39, 41, 42, 46, 47, 49, 53, 56, 57], "style": [2, 3, 20], "adjust": [2, 51, 59, 60], "design": 2, "python": [2, 9, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 33, 35, 38, 41, 43, 44, 51, 55, 56, 57, 59, 60], "recommend": [2, 6, 7, 9, 21, 31, 33, 35, 36, 38, 39, 41, 56, 57], "test": [2, 4, 9, 15, 20, 22, 23, 30, 31, 33, 35, 36, 38, 39, 42, 43, 46, 47, 51, 59, 60], "valid": [2, 21, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "dataset": [2, 10, 11, 13, 14, 21, 22, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "lhots": [2, 9, 11, 13, 15, 20, 22, 24, 25, 26, 33, 35, 38], "readthedoc": [2, 11, 21], "io": [2, 9, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 41, 55, 56, 57], "en": [2, 11, 21, 24], "latest": [2, 9, 11, 13, 19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "index": [2, 21, 23, 24, 25, 26, 27, 28, 29, 55, 56, 57], "yesno": [2, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 37, 49, 61], "veri": [2, 3, 7, 13, 24, 25, 26, 31, 35, 46, 47, 49, 56, 57], "good": [2, 7], "exampl": [2, 11, 13, 19, 20, 22, 24, 25, 26, 28, 29, 30, 39, 42, 46, 47, 49], "speech": [2, 11, 13, 14, 19, 20, 21, 23, 32, 34, 35, 49, 59, 60, 61], "pull": [2, 4, 6, 7, 9, 24, 25, 26, 27, 31, 33, 35, 38, 51, 53], "380": [2, 24, 47], "show": [2, 4, 6, 7, 9, 15, 19, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "add": [2, 11, 24, 25, 26, 33, 35, 36, 56, 61], "new": [2, 3, 9, 13, 19, 21, 24, 25, 26, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 49, 55, 56, 57], "suppos": [2, 9, 56, 57], "would": [2, 11, 22, 24, 25, 26, 38, 42, 56, 57], "name": [2, 9, 10, 13, 15, 22, 24, 25, 26, 27, 33, 35, 41, 43, 44, 51, 56, 57], "foo": [2, 29, 33, 38, 41, 43, 44, 55, 56, 57], "eg": [2, 9, 10, 11, 12, 15, 16, 18, 21, 22, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "mkdir": [2, 9, 24, 25, 33, 35, 36, 38, 42, 46, 47, 49], "p": [2, 4, 13, 21, 24, 25, 35, 46, 47], "asr": [2, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 61], "touch": 2, "sh": [2, 9, 11, 21, 22, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "chmod": 2, "x": [2, 4, 26, 53], "simpl": [2, 12, 14, 16, 21, 35, 51], "own": [2, 11, 31, 39, 41, 51, 56, 57], "otherwis": [2, 24, 25, 26, 31, 33, 35, 38, 39, 41, 43, 44, 55, 56, 57], "librispeech": [2, 4, 6, 7, 10, 18, 20, 22, 24, 25, 26, 27, 28, 29, 31, 37, 38, 39, 41, 42, 43, 44, 51, 52, 53, 55, 56, 57, 61], "assum": [2, 4, 15, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 39, 41, 42, 46, 47, 49, 51, 55, 56, 57], "fanci": 2, "call": [2, 10, 27, 39, 51], "bar": [2, 29, 33, 38, 41, 43, 44, 55, 56, 57], "organ": 2, "wai": [2, 3, 15, 30, 41, 43, 44, 53, 55, 56, 57], "readm": [2, 33, 35, 36, 38, 42, 46, 47, 49], "md": [2, 18, 22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "asr_datamodul": [2, 9, 10, 15, 21], "pretrain": [2, 4, 6, 7, 15, 22, 24, 25, 26, 27, 29, 31, 33, 35, 36, 38, 42, 46, 47, 49, 58], "For": [2, 4, 6, 7, 9, 10, 14, 18, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "instanc": [2, 9, 10, 12, 16, 18, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "tdnn": [2, 9, 10, 12, 15, 16, 21, 34, 37, 40, 45, 48], "its": [2, 4, 22, 23, 24, 25, 26, 29, 35, 43, 51], "directori": [2, 9, 11, 13, 20, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "structur": [2, 26], "descript": [2, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "contain": [2, 8, 11, 13, 14, 15, 20, 22, 23, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 61], "inform": [2, 4, 6, 11, 12, 16, 21, 22, 33, 35, 36, 38, 41, 42, 43, 46, 47, 49, 53, 55, 56, 57], "g": [2, 4, 5, 6, 7, 11, 13, 21, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "wer": [2, 5, 9, 12, 15, 21, 22, 31, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "etc": [2, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "provid": [2, 11, 15, 19, 21, 22, 23, 24, 25, 26, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 61], "pytorch": [2, 10, 13, 21, 24, 25, 26, 35], "dataload": [2, 21], "take": [2, 7, 9, 22, 39, 41, 49, 51, 56, 57, 59, 60], "input": [2, 22, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47, 49, 53], "checkpoint": [2, 4, 6, 7, 12, 15, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "save": [2, 15, 16, 21, 22, 25, 26, 28, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "dure": [2, 4, 5, 7, 10, 13, 19, 22, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "stage": [2, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "": [2, 4, 6, 7, 9, 14, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "definit": [2, 24, 25], "neural": [2, 4, 6, 7, 33, 38, 51], "network": [2, 33, 35, 38, 41, 43, 44, 51, 55, 56, 57], "script": [2, 6, 7, 13, 14, 20, 21, 29, 30, 33, 35, 36, 38, 39, 42, 46, 47, 49, 51, 55], "infer": [2, 22, 24, 25, 58], "tdnn_lstm_ctc": [2, 36, 42, 47], "conformer_ctc": [2, 33, 38], "get": [2, 9, 13, 14, 15, 19, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 49, 53, 55, 56, 57], "feel": [2, 39, 51, 55], "result": [2, 4, 7, 9, 16, 18, 19, 22, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "everi": [2, 22, 41, 43, 44, 55, 56, 57], "kept": [2, 41, 56, 57], "self": [2, 23, 26, 53], "toler": 2, "duplic": 2, "among": [2, 21], "differ": [2, 9, 12, 21, 24, 25, 26, 27, 31, 32, 33, 34, 38, 39, 41, 53, 55, 56, 57], "invoc": [2, 24, 25], "help": [2, 12, 14, 16, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "blob": [2, 11, 18, 21, 22, 29, 41, 43, 44, 55, 56, 57], "master": [2, 6, 9, 11, 15, 18, 21, 22, 25, 26, 28, 29, 31, 35, 39, 41, 43, 44, 55, 56, 57], "transform": [2, 6, 7, 33, 38, 55], "conform": [2, 28, 34, 35, 37, 40, 41, 43, 55, 56, 57], "base": [2, 4, 7, 13, 26, 31, 33, 35, 36, 38, 39, 41, 43, 44, 51, 55, 56, 57], "lstm": [2, 23, 29, 30, 34, 37, 40, 45, 52, 54], "attent": [2, 26, 35, 36, 39, 53, 56, 57], "lm": [2, 4, 5, 7, 9, 11, 20, 21, 35, 41, 42, 46, 47, 49, 51, 56, 57, 61], "rescor": [2, 5, 20, 36, 42, 44, 46, 47, 49, 51], "demonstr": [2, 14, 15, 17, 19, 22, 27], "consid": [2, 4, 26, 31], "colab": [2, 21], "notebook": [2, 21], "welcom": 3, "There": [3, 4, 15, 24, 25, 26, 27, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "mani": [3, 12, 21, 56, 57], "two": [3, 4, 11, 14, 15, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 60], "them": [3, 5, 6, 17, 18, 19, 24, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "To": [3, 4, 5, 6, 7, 9, 11, 15, 19, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "document": [3, 20, 22, 23, 24, 25, 26, 27, 44], "repositori": [3, 9, 24, 25, 26, 27], "recip": [3, 4, 6, 7, 9, 11, 15, 18, 20, 21, 22, 27, 31, 33, 35, 36, 38, 39, 41, 42, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "In": [3, 4, 6, 10, 15, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 32, 33, 35, 36, 38, 39, 42, 46, 47, 49, 53], "page": [3, 19, 28, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 61], "describ": [3, 5, 8, 9, 17, 22, 24, 25, 27, 28, 29, 30, 33, 35, 36, 38, 41, 42, 46, 47, 56, 57], "how": [3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "creat": [3, 4, 6, 7, 14, 15, 20, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56], "data": [3, 4, 6, 7, 8, 13, 14, 15, 16, 20, 22, 24, 25, 26, 27, 28, 29, 32, 34, 51, 58], "train": [3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 28, 29, 30, 50, 53, 58, 61], "decod": [3, 4, 8, 10, 11, 14, 15, 19, 20, 24, 25, 26, 29, 30, 31, 51], "model": [3, 4, 6, 7, 9, 11, 12, 14, 17, 19, 20, 21, 23, 39, 50, 53, 58, 61], "As": [4, 5, 6, 7, 24, 35, 38, 39, 51], "type": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 33, 35, 38, 41, 43, 44, 49, 53, 55, 56, 57, 59], "e2": [4, 7, 21, 51], "usual": [4, 6, 7, 12, 33, 35, 36, 38, 39, 41, 43, 44, 51, 55, 56, 57, 59, 60], "an": [4, 5, 6, 7, 9, 11, 13, 15, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 33, 34, 35, 38, 39, 41, 44, 49, 50, 55, 56, 57, 59, 60, 61], "intern": [4, 5], "languag": [4, 7, 11, 19, 20, 33, 35, 36, 50, 61], "learn": [4, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "level": [4, 5, 15, 51], "corpu": [4, 6, 7, 34, 51], "real": 4, "life": 4, "scenario": 4, "often": [4, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "mismatch": [4, 31, 56], "between": [4, 7, 41, 56, 57], "target": [4, 19, 21], "space": [4, 17, 20, 51], "problem": [4, 6, 7, 21, 39], "when": [4, 6, 9, 10, 15, 19, 24, 25, 26, 30, 35, 38, 39, 41, 43, 44, 51, 56, 57], "act": 4, "against": [4, 21], "extern": [4, 5, 6, 7], "tutori": [4, 5, 6, 7, 13, 15, 20, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 51, 55, 56, 57, 59, 60], "low": [4, 24, 25, 32, 59], "order": [4, 13, 21, 24, 25, 26, 33, 36, 38, 42, 46, 47], "densiti": 4, "ratio": 4, "allevi": 4, "effect": [4, 7, 26], "improv": [4, 5, 6, 7, 32, 35, 51], "perform": [4, 6, 7, 23, 31, 32, 35, 39, 56], "languga": 4, "integr": [4, 19], "pruned_transducer_stateless7_stream": [4, 6, 7, 26, 27, 57], "stream": [4, 6, 7, 15, 20, 23, 24, 25, 27, 30, 33, 38, 46, 47, 55, 61], "howev": [4, 6, 7, 22, 25, 32, 39], "easili": [4, 6, 7, 33, 36, 38], "appli": [4, 6, 7, 35, 53], "other": [4, 7, 9, 13, 14, 15, 22, 25, 26, 27, 35, 38, 39, 41, 42, 46, 47, 49, 53, 56, 57, 61], "encount": [4, 6, 7, 10, 21, 26, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "here": [4, 6, 7, 22, 24, 25, 26, 33, 35, 36, 38, 39, 42, 53, 56], "simplic": [4, 6, 7], "same": [4, 6, 7, 21, 22, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "domain": [4, 6, 7, 31, 32], "gigaspeech": [4, 6, 7, 18, 28, 31, 55], "first": [4, 6, 9, 10, 11, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "let": [4, 6, 7, 14, 21, 24, 25, 26, 31, 33, 38, 51], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 35, 53, 57], "address": [4, 9, 15, 19, 21, 22, 24, 25, 26, 35, 41, 44, 55, 56, 57], "sourc": [4, 11, 13, 21, 22, 24, 25, 26, 33, 34, 35, 38], "acoust": [4, 56, 57], "similar": [4, 5, 31, 39, 43, 56, 57], "deriv": 4, "formula": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 11, 16, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "score": [4, 5, 7, 33, 38, 41, 56, 57], "left": [4, 24, 26, 35, 56, 57], "y_u": 4, "mathit": 4, "y": 4, "right": [4, 24, 35, 53, 56], "log": [4, 9, 10, 12, 15, 16, 21, 24, 25, 26, 42, 46, 47, 49, 59, 60], "y_": 4, "u": [4, 21, 24, 25, 26, 33, 35, 36, 38, 39, 49], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 9, 10, 56], "weight": [4, 15, 33, 36, 38, 43, 44, 51, 55], "respect": 4, "onli": [4, 6, 8, 11, 13, 14, 15, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "compar": [4, 24, 25, 26, 31, 56], "shallow": [4, 5, 20, 51], "fusion": [4, 5, 20, 51], "subtract": [4, 5], "work": [4, 9, 13, 15, 24, 25, 26, 38], "treat": [4, 25, 26], "predictor": 4, "joiner": [4, 24, 25, 26, 27, 29, 31, 35, 41, 55, 56, 57], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 11, 21, 33, 39, 41, 43, 44, 46, 47, 55, 56, 57], "gram": [4, 6, 21, 33, 35, 36, 41, 42, 44, 46, 47, 56, 57], "approxim": [4, 5], "ilm": 4, "lead": [4, 7, 12], "rnnt": [4, 41, 56, 57], "bi": [4, 6], "addit": [4, 32], "estim": 4, "li": 4, "choic": 4, "accord": [4, 51], "origin": [4, 5, 31, 32], "paper": [4, 5, 39, 41, 55, 56, 57, 59, 60], "achiev": [4, 6, 7, 31, 51, 53], "both": [4, 31, 41, 43, 44, 53, 55, 56, 57], "intra": 4, "cross": 4, "much": [4, 24, 25, 31], "faster": [4, 6, 59], "evalu": 4, "now": [4, 6, 9, 13, 15, 21, 24, 25, 26, 33, 38, 39, 41, 42, 43, 44, 46, 47, 51, 55, 56, 57], "illustr": [4, 6, 7, 31, 51], "purpos": [4, 6, 7, 24, 25, 31, 51], "from": [4, 6, 7, 9, 10, 11, 14, 15, 17, 19, 20, 21, 22, 24, 25, 26, 27, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60, 61], "link": [4, 6, 7, 18, 21, 22, 23, 41, 43, 44, 55, 56, 57, 59, 60], "scratch": [4, 6, 7, 31, 41, 43, 44, 51, 55, 56, 57, 59, 60], "prune": [4, 6, 7, 22, 26, 27, 35, 37, 39, 40, 52, 53, 54, 55, 57], "statelessx": [4, 6, 7, 37, 39, 40, 52, 53, 54], "initi": [4, 6, 7, 9, 31, 33, 36], "step": [4, 6, 7, 11, 14, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 43, 44, 49, 51, 55, 56, 57], "download": [4, 6, 7, 8, 10, 13, 15, 19, 20, 23, 30, 31, 34, 39, 51, 58], "git_lfs_skip_smudg": [4, 6, 7, 24, 25, 26, 27, 31, 51], "huggingfac": [4, 6, 7, 18, 20, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 42, 43, 44, 46, 47, 49, 51, 55, 59, 60], "co": [4, 6, 7, 18, 19, 21, 22, 24, 25, 26, 27, 31, 33, 34, 35, 36, 38, 42, 43, 44, 46, 47, 49, 51, 55, 59, 60], "zengwei": [4, 6, 7, 24, 26, 27, 31, 44, 51, 55, 59], "stateless7": [4, 6, 7, 26, 27], "2022": [4, 6, 7, 22, 24, 25, 26, 27, 35, 41, 43, 44, 55, 56], "12": [4, 6, 7, 9, 14, 21, 22, 24, 25, 26, 27, 33, 35, 36, 38, 41, 43, 44, 46, 49, 55, 56, 57, 59, 60], "29": [4, 6, 7, 21, 26, 27, 33, 35, 36, 38, 42, 43, 46, 47], "exp": [4, 6, 7, 9, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "lf": [4, 6, 7, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 42, 44, 46, 47, 49, 51], "includ": [4, 6, 7, 24, 25, 26, 27, 31, 41, 43, 44, 51, 55, 56, 57], "pt": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "ln": [4, 6, 7, 9, 15, 22, 24, 25, 26, 27, 31, 33, 38, 41, 43, 44, 51, 55, 56, 57], "epoch": [4, 6, 7, 9, 12, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "99": [4, 6, 7, 15, 21, 24, 25, 26, 27, 31], "symbol": [4, 5, 6, 7, 21, 35, 41, 56, 57], "load": [4, 6, 7, 9, 15, 21, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "lang_bpe_500": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 31, 38, 41, 43, 44, 51, 55, 56, 57], "bpe": [4, 5, 6, 7, 22, 24, 25, 26, 27, 29, 31, 38, 41, 43, 44, 51, 55, 56, 57], "done": [4, 6, 7, 9, 13, 15, 21, 22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "via": [4, 6, 7, 14, 21, 23, 28, 29, 30, 31, 51], "exp_dir": [4, 6, 7, 9, 15, 21, 24, 25, 26, 35, 38, 39, 41, 43, 44, 56, 57], "avg": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 35, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "averag": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "fals": [4, 6, 7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 35, 38, 39], "dir": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "max": [4, 6, 7, 21, 22, 24, 25, 31, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57, 59, 60], "durat": [4, 6, 7, 11, 22, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "600": [4, 6, 7, 21, 22, 38, 41, 43, 55, 56, 57], "chunk": [4, 6, 7, 24, 26, 27, 56, 57], "len": [4, 6, 7, 26, 27, 57], "32": [4, 6, 7, 21, 24, 25, 26, 27, 33, 35, 36, 57], "method": [4, 5, 7, 15, 19, 22, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 55, 56, 57], "modified_beam_search": [4, 5, 6, 7, 19, 35, 39, 41, 43, 55, 56, 57], "clean": [4, 9, 15, 21, 26, 33, 35, 38, 39, 41, 42, 43, 44, 55, 56, 57], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 11, 15, 21, 24, 25, 27, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "best": [4, 5, 6, 7, 24, 25, 26, 31, 33, 36, 38], "7": [4, 6, 7, 9, 21, 22, 23, 26, 30, 33, 36, 38, 41, 42, 46, 47, 55, 56], "93": [4, 6, 7, 15], "Then": [4, 6], "necessari": [4, 39, 51], "note": [4, 5, 6, 7, 10, 11, 15, 22, 24, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "960": [4, 31, 38, 41, 43, 44, 55, 56, 57], "hour": [4, 13, 31, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "ezerhouni": [4, 6, 7], "pushd": [4, 6, 7, 27], "popd": [4, 6, 7, 27], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 11, 21, 35, 49], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 21, 38], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 21, 25, 33, 38, 49], "lodr_scal": 4, "24": [4, 9, 10, 13, 15, 21, 24, 25, 36, 42, 46, 47, 49], "modified_beam_search_lodr": [4, 5, 6], "scale": [4, 6, 7, 24, 25, 33, 38, 39, 42, 44, 46, 47], "embed": [4, 6, 7, 35, 41, 51, 55, 56, 57], "dim": [4, 6, 7, 24, 25, 26, 35, 41, 51, 56], "2048": [4, 6, 7, 22, 24, 25, 26, 35, 51], "hidden": [4, 6, 7, 25, 51, 55], "num": [4, 6, 7, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 43, 44, 51, 55, 56, 57, 59, 60], "layer": [4, 6, 7, 24, 25, 26, 35, 39, 41, 51, 53, 55, 56, 57], "vocab": [4, 6, 7, 38], "500": [4, 6, 7, 22, 24, 25, 26, 35, 38, 44, 55, 59, 60], "token": [4, 11, 22, 24, 25, 26, 27, 28, 29, 33, 35, 36, 38, 42, 46, 47, 49, 51, 59, 60], "ngram": [4, 38, 42, 46, 47], "2": [4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 30, 31, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "extra": [4, 24, 25, 26, 35, 53, 56], "argument": [4, 7, 15, 31, 39, 53], "need": [4, 6, 11, 13, 14, 15, 19, 21, 22, 23, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57], "given": [4, 9, 11, 12, 13, 15, 21, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 56, 57, 59, 60], "specifi": [4, 7, 10, 12, 15, 16, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "neg": [4, 35], "number": [4, 7, 16, 19, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "obtain": [4, 7, 33, 35, 36, 38, 42, 46, 47], "shown": [4, 7], "below": [4, 7, 9, 11, 12, 13, 14, 15, 16, 21, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56], "61": [4, 6], "6": [4, 6, 7, 9, 10, 11, 15, 23, 30, 33, 35, 38, 41, 42, 46, 47, 55, 60], "74": [4, 6, 21, 22], "recal": 4, "lowest": [4, 12, 15, 41, 43, 44, 55, 56, 57], "77": [4, 6, 7, 21, 38], "08": [4, 6, 7, 9, 15, 26, 38, 42, 44, 46, 47, 49, 55], "inde": 4, "even": [4, 19, 21, 25], "better": [4, 6], "increas": [4, 6, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "8": [4, 6, 7, 9, 10, 15, 21, 22, 24, 25, 26, 33, 35, 38, 39, 41, 42, 43, 44, 49, 55, 56, 57], "45": [4, 6, 15, 21, 24, 26, 33, 35, 38], "38": [4, 6, 21, 24, 33, 35, 38, 46], "23": [4, 6, 9, 10, 11, 15, 21, 24, 25, 26, 33, 35, 36, 38, 46, 47, 49], "section": [5, 8, 9, 10, 17, 21, 22, 27, 28, 29, 30, 33, 38], "langugag": 5, "transduc": [5, 20, 22, 23, 27, 30, 31, 34, 37, 39, 40, 51, 52, 53, 54], "rnn": [5, 6, 7, 20, 25, 35, 41, 43, 55, 56, 57, 61], "avail": [5, 6, 8, 15, 20, 21, 22, 24, 25, 26, 31, 32, 33, 35, 38, 42, 46, 47, 49, 55], "beam": [5, 22, 55], "search": [5, 6, 7, 18, 19, 58], "realli": [5, 33, 36, 38, 41, 43, 44, 55, 56, 57], "valu": [5, 7, 24, 25, 26, 31, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "t": [5, 13, 14, 15, 21, 24, 25, 26, 27, 28, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "doe": [5, 15, 24, 25, 26, 33, 35, 38, 49], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 13, 14, 15, 17, 18, 21, 22, 23, 24, 25, 26, 27, 29, 33, 35, 36, 38, 41, 43, 44, 49, 51, 53, 55, 56, 57], "known": 5, "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6, 51], "re": [5, 6, 10, 33, 36, 38, 39, 41, 43, 44, 53, 55, 56, 57], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 20, 51], "commonli": [6, 7, 33, 35, 36, 38, 42, 46, 47, 49], "approach": 6, "incorpor": 6, "unlik": 6, "more": [6, 14, 21, 24, 25, 26, 33, 38, 39, 49, 51, 53, 55, 56, 59, 60], "effici": [6, 7, 41, 56, 57], "than": [6, 21, 22, 25, 33, 35, 36, 38, 41, 42, 43, 44, 49, 55, 56, 57], "sinc": [6, 13, 21, 24, 25, 26, 31, 39, 49, 55], "less": [6, 22, 38, 42, 49, 56, 57], "comput": [6, 15, 21, 22, 24, 25, 26, 33, 35, 36, 39, 41, 42, 44, 46, 47, 49, 55, 56, 57], "gpu": [6, 7, 8, 13, 14, 20, 21, 24, 25, 31, 33, 35, 36, 38, 39, 41, 43, 44, 46, 47, 49, 55, 56, 57], "try": [6, 10, 12, 15, 17, 19, 39, 41, 43, 44, 55, 56, 57], "might": [6, 7, 25, 26, 56, 57], "ideal": [6, 7], "mai": [6, 7, 9, 21, 24, 25, 26, 31, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57, 61], "With": [6, 21], "43": [6, 9, 25, 26, 38], "great": 6, "made": [6, 24], "boost": [6, 7], "tabl": [6, 19, 24, 25, 26], "67": [6, 21], "59": [6, 15, 21, 24, 36, 38], "86": 6, "fact": 6, "arpa": [6, 11, 49], "performn": 6, "depend": [6, 14, 15, 21, 33, 38], "kenlm": 6, "kpu": 6, "archiv": [6, 51], "zip": 6, "execut": [6, 7, 13, 24, 33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "9": [6, 9, 21, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 49, 55, 56, 57], "57": [6, 21, 25, 38, 42], "slightli": 6, "63": [6, 35], "04": [6, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47], "52": [6, 21, 33, 38], "73": 6, "mention": [6, 53], "earlier": 6, "benchmark": [6, 35], "speed": [6, 24, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "132": 6, "95": [6, 34], "177": [6, 21, 22, 25, 26, 35, 36, 38], "96": [6, 21], "210": [6, 46, 47], "262": [6, 7, 15], "62": [6, 7, 21, 38, 42], "65": [6, 7, 21, 24], "352": [6, 7, 38], "58": [6, 7, 10, 21, 38], "488": [6, 7, 24, 25, 26], "400": [6, 9, 34], "610": 6, "870": 6, "156": [6, 15], "203": [6, 15, 22, 38], "255": [6, 25, 26], "160": [6, 15], "263": [6, 9, 15, 21, 25], "singl": [6, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "32g": 6, "v100": [6, 33, 35, 36, 38], "vari": 6, "word": [7, 11, 12, 15, 33, 35, 36, 38, 42, 46, 47, 49, 51], "error": [7, 9, 10, 12, 13, 15, 21, 24, 25, 26, 38], "rate": [7, 12, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "These": [7, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "alreadi": [7, 11, 13, 21, 22, 32], "But": [7, 24, 41, 43, 44, 55, 56, 57], "long": [7, 24, 51, 59, 60], "true": [7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 35, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "either": [7, 15, 19, 33, 35, 36, 38, 56, 57], "choos": [7, 19, 21, 39, 41, 43, 44, 55, 56, 57], "three": [7, 15, 24, 25, 26, 29, 33, 35, 53], "associ": 7, "dimens": [7, 41, 51, 56, 57], "obviou": 7, "rel": [7, 32], "reduct": [7, 15, 21, 24, 25, 43], "around": [7, 31], "A": [7, 14, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 55, 56, 57], "few": [7, 11, 24, 25, 26, 39], "paramet": [7, 14, 22, 24, 25, 26, 28, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 51, 55, 56, 57, 59, 60], "tune": [7, 20, 24, 25, 26, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57, 61], "control": [7, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "too": 7, "small": [7, 31, 32, 35, 46, 47, 49], "fulli": 7, "util": [7, 9, 10, 15, 21, 38], "larg": [7, 13], "domin": 7, "bad": 7, "typic": [7, 33, 35, 36, 38], "activ": [7, 13, 19, 21], "path": [7, 9, 15, 19, 21, 22, 24, 25, 26, 29, 31, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "trade": 7, "off": [7, 24], "accuraci": [7, 24, 25, 32, 34], "larger": [7, 25, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "slower": 7, "built": [8, 9, 21], "imag": [8, 20], "cpu": [8, 12, 13, 14, 15, 16, 20, 21, 22, 24, 25, 26, 28, 33, 41, 43, 44, 49, 56, 57, 59], "still": [8, 24, 25, 26, 32], "introduct": [8, 20, 52, 61], "tag": [8, 20], "cuda": [8, 10, 15, 20, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 55, 56, 57], "enabl": [8, 21, 39], "within": [8, 14, 17, 19, 20, 24, 25], "updat": [8, 24, 25, 26], "host": [9, 22], "hub": 9, "k2fsa": 9, "find": [9, 10, 16, 17, 18, 19, 22, 24, 25, 26, 29, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "dockerfil": 9, "tree": [9, 11, 28, 29, 33, 35, 36, 38, 42, 46, 47, 49, 55], "item": [9, 14], "curl": 9, "registri": 9, "v2": [9, 26, 33, 38], "jq": 9, "give": [9, 11, 15, 35], "someth": [9, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "torch2": [9, 13, 15], "cuda12": 9, "cuda11": [9, 10, 21], "torch1": [9, 10, 21], "cuda10": 9, "13": [9, 10, 15, 21, 22, 24, 25, 26, 31, 35, 36, 38, 42, 43, 46], "releas": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38], "torch": [9, 10, 13, 14, 20, 22, 23, 30, 33, 35, 38], "select": [9, 12, 13, 14, 19, 21, 24, 25, 26, 41, 42, 46, 47, 49, 55, 56, 57], "appropri": [9, 21], "combin": [9, 12, 24, 25, 26], "visit": [9, 18, 19, 41, 43, 44, 55, 56, 57, 59, 60], "pkg": 9, "py3": [9, 10, 21], "v1": [9, 33, 36, 38, 42, 46, 47], "current": [9, 19, 24, 25, 35, 39, 53, 55, 56, 57, 59, 60, 61], "ghcr": 9, "alwai": [9, 21, 22], "sudo": [9, 33, 36], "rm": 9, "bin": [9, 13, 21, 24, 25, 26, 33, 38], "bash": 9, "start": [9, 11, 12, 14, 15, 16, 19, 21, 22, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "interfac": 9, "present": [9, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "root": [9, 24, 25, 26, 51], "60c947eac59c": 9, "workspac": 9, "export": [9, 10, 11, 12, 13, 14, 16, 20, 21, 33, 35, 36, 38, 39, 42, 46, 47, 49, 58], "pythonpath": [9, 11, 12, 13, 15, 16, 21, 24, 25, 26], "user": [9, 10], "copi": [9, 21, 53], "switch": [9, 21, 33, 38, 44], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 21, 26], "site": [9, 10, 15, 21, 26], "packag": [9, 10, 15, 21, 26, 59, 60], "__init__": [9, 10, 15, 21, 22, 24, 25, 26, 33, 35, 38], "line": [9, 10, 11, 24, 25, 26, 41, 51, 56, 57], "modul": [9, 13, 20, 24, 26, 43, 56], "_k2": [9, 10, 21], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 20], "libcuda": 9, "cannot": [9, 20, 24, 25, 26], "share": [9, 20, 21], "object": [9, 20, 21, 33, 35, 36, 41, 49, 55, 56], "No": [9, 13, 20, 24, 25, 26, 49], "stub": 9, "list": [9, 15, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47], "16": [9, 15, 21, 22, 24, 25, 26, 29, 33, 35, 36, 38, 41, 42, 46, 47, 49, 55, 56, 57], "second": [9, 14, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56, 57], "2023": [9, 15, 21, 24, 25, 26, 31, 43, 51, 60], "01": [9, 11, 15, 21, 24, 35, 36, 38, 39, 43], "02": [9, 11, 21, 22, 24, 25, 26, 35, 38, 41, 47, 55, 56, 59], "06": [9, 15, 21, 22, 24, 31, 36, 38, 42, 49], "info": [9, 15, 21, 22, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47, 49], "264": [9, 21, 26], "posixpath": [9, 15, 21, 24, 25, 26, 35, 38], "lang_dir": [9, 15, 21, 35, 38], "lang_phon": [9, 11, 15, 21, 36, 42, 46, 47, 49], "feature_dim": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38, 49], "search_beam": [9, 15, 21, 33, 38, 49], "20": [9, 14, 15, 21, 22, 24, 26, 31, 33, 35, 36, 38, 41, 42, 46, 47, 49, 51, 56], "output_beam": [9, 15, 21, 33, 38, 49], "min_active_st": [9, 15, 21, 33, 38, 49], "30": [9, 10, 15, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56, 57], "max_active_st": [9, 15, 21, 33, 38, 49], "10000": [9, 15, 21, 33, 38, 49], "use_double_scor": [9, 15, 21, 33, 38, 49], "14": [9, 10, 15, 21, 22, 24, 25, 28, 33, 38, 41, 42, 43, 46, 55, 56, 57], "feature_dir": [9, 15, 21, 38], "fbank": [9, 11, 15, 21, 22, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47, 49], "max_dur": [9, 15, 21, 38], "bucketing_sampl": [9, 15, 21, 38], "num_bucket": [9, 15, 21, 38], "concatenate_cut": [9, 15, 21, 38], "duration_factor": [9, 15, 21, 38], "gap": [9, 15, 21, 38], "on_the_fly_feat": [9, 15, 21, 38], "shuffl": [9, 15, 21, 38], "return_cut": [9, 15, 21, 38], "num_work": [9, 15, 21, 38], "env_info": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38], "sha1": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 21], "date": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38], "tue": [9, 21, 24, 38], "jul": [9, 15, 21], "25": [9, 15, 21, 22, 24, 25, 33, 38, 41, 46, 47, 49, 56], "36": [9, 21, 24, 35, 38, 39], "dev": [9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "7640d663": 9, "branch": [9, 15, 21, 22, 24, 25, 26, 33, 35, 38, 43], "375520d": 9, "fri": [9, 22], "28": [9, 21, 24, 25, 35, 38, 42, 59], "07": [9, 21, 24, 25, 26, 33, 35, 36, 38], "hostnam": [9, 15, 21, 22, 24, 25, 26, 35], "ip": [9, 15, 21, 22, 24, 25, 26, 35], "172": 9, "17": [9, 21, 22, 24, 25, 26, 33, 38, 46, 47, 55], "401": 9, "lexicon": [9, 11, 15, 21, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56, 57], "168": [9, 15, 21, 42], "compil": [9, 15, 21, 24, 25, 33, 35, 38], "linv": [9, 11, 15, 21, 35, 38, 49], "403": [9, 42], "273": [9, 15, 21, 22, 35], "devic": [9, 15, 21, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 56, 57], "406": [9, 38], "291": [9, 21], "424": 9, "218": [9, 15, 21, 25], "about": [9, 11, 12, 14, 15, 16, 21, 24, 25, 26, 35, 39, 41, 44, 55, 56, 57], "cut": [9, 15, 21, 38], "425": [9, 25, 38], "252": [9, 21], "504": 9, "204": [9, 21, 26, 38], "batch": [9, 15, 21, 24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 51, 55, 56, 57], "process": [9, 15, 21, 22, 24, 25, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "until": [9, 15, 21, 38, 43], "w": [9, 21, 38, 46, 47], "nnpack": 9, "cpp": [9, 24, 28], "53": [9, 15, 21, 26, 33, 41, 42, 47, 55, 56], "could": [9, 24, 25, 26, 31, 32, 33, 36, 51], "reason": [9, 14, 22, 24, 25, 26, 31, 56], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 21, 33], "transcript": [9, 15, 21, 33, 34, 35, 36, 38, 41, 42, 46, 47, 55, 56, 57], "store": [9, 11, 15, 21, 38, 51], "recog": [9, 15, 21, 35, 38], "test_set": [9, 15, 21, 49], "688": 9, "564": [9, 15, 21], "240": [9, 15, 21, 33, 49], "ins": [9, 15, 21, 38, 49], "del": [9, 15, 21, 38, 49], "sub": [9, 15, 21, 38, 49], "690": 9, "249": [9, 21, 25], "wrote": [9, 15, 21, 38], "detail": [9, 11, 15, 21, 23, 27, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 53, 55, 56, 57, 59, 60], "stat": [9, 15, 21, 38], "err": [9, 15, 21, 35, 38], "316": [9, 21, 38], "congratul": [9, 13, 21, 24, 25, 26, 33, 36, 38, 42, 46, 47, 49], "finish": [9, 14, 33, 35, 36, 38, 39, 41, 42, 46, 47, 49, 56, 57], "successfulli": [9, 13, 21, 24, 25, 26], "collect": [10, 13, 21, 51], "post": 10, "correspond": [10, 18, 19], "solut": 10, "One": 10, "torchaudio": [10, 13, 20, 53], "cu111": 10, "torchvis": 10, "f": [10, 13, 15, 21, 46, 47], "org": [10, 13, 21, 34, 35, 41, 51, 55, 56, 57], "whl": [10, 13, 21], "torch_stabl": [10, 13, 21], "throw": [10, 24, 25, 26], "while": [10, 16, 21, 24, 25, 26, 33, 35, 36, 38, 39, 41, 43, 44, 51, 55, 56, 57], "That": [10, 11, 14, 15, 16, 24, 25, 39, 41, 55, 56, 57], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 56, 57], "recent": [10, 24, 25, 26], "last": 10, "yesnoasrdatamodul": 10, "home": [10, 24, 25, 33, 38], "xxx": [10, 22, 24, 25, 26], "next": [10, 13, 14, 19, 21, 24, 25, 26, 38, 39, 41, 42, 43, 44, 51, 55, 56, 57], "gen": [10, 13, 14, 19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "kaldi": [10, 11, 13, 14, 19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "34": [10, 24, 25], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 21, 24, 26, 35, 38, 42, 46], "tensorboard": [10, 16, 21, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 13, 21], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "linux": [10, 13, 14, 19, 21, 23, 24, 25, 26, 27], "x86_64": [10, 21, 24], "egg": 10, "handl": [10, 33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "except": [10, 22], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 43], "104": [10, 15, 21], "rais": 10, "anaconda": 10, "maco": [10, 13, 14, 19, 23, 24, 25, 26, 27], "probabl": [10, 35, 41, 43, 55, 56, 57], "variabl": [10, 12, 13, 16, 21, 24, 25, 26, 33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 16, 24], "libpython": 10, "abl": 10, "insid": [10, 29], "codna_prefix": 10, "ld_library_path": 10, "setup": [11, 14, 20, 21, 24, 31, 33, 35, 36, 38, 39, 41, 42, 46, 47, 49, 56, 57, 59, 60], "everyth": [11, 23], "tmp": [11, 12, 13, 15, 16, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "each": [11, 15, 22, 24, 25, 27, 33, 35, 36, 38, 41, 43, 44, 51, 53, 55, 56, 57], "exist": 11, "anyth": [11, 17, 19], "els": 11, "wonder": [11, 15], "url": [11, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "varieti": 11, "folder": [11, 21, 22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "wav": [11, 15, 22, 24, 25, 26, 27, 29, 33, 35, 36, 38, 41, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "scp": 11, "feat": 11, "put": [11, 13, 21, 24, 25, 43, 56], "l": [11, 21, 24, 25, 26, 35, 46, 47, 49], "waves_yesno": [11, 15, 21], "tar": [11, 21], "gz": [11, 21, 51], "l41": 11, "extract": [11, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "yesno_cuts_test": 11, "jsonl": [11, 22], "yesno_cuts_train": 11, "yesno_feats_test": 11, "lca": 11, "yesno_feats_train": 11, "hlg": [11, 15, 21, 42, 46, 47, 49], "l_disambig": [11, 49], "lexicon_disambig": [11, 49], "manifest": [11, 21, 31, 39], "yesno_recordings_test": 11, "yesno_recordings_train": 11, "yesno_supervisions_test": 11, "yesno_supervisions_train": 11, "18": [11, 21, 24, 25, 26, 33, 35, 36, 38, 41, 42, 46, 47, 55, 56, 57], "thei": [11, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "idea": [11, 15, 53], "examin": 11, "relat": [11, 22, 31, 33, 35, 38, 42, 46, 47, 49, 59, 60], "gunzip": 11, "c": [11, 21, 35, 36, 41, 43, 44, 49, 55, 56, 57], "head": [11, 21, 35, 53], "output": [11, 12, 13, 15, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "id": [11, 33, 36, 38, 42, 46, 47], "0_0_0_0_1_1_1_1": 11, "channel": [11, 19, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "sampling_r": 11, "num_sampl": 11, "50800": 11, "35": [11, 21, 22, 24, 25, 26, 35, 38, 55], "channel_id": 11, "0_0_0_1_0_1_1_0": 11, "48880": 11, "0_0_1_0_0_1_1_0": 11, "48160": 11, "audio": [11, 21, 46, 47], "l300": 11, "mean": [11, 14, 15, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "field": [11, 34], "per": [11, 35, 41, 56, 57], "recording_id": 11, "NO": [11, 15, 49], "ye": [11, 15, 49], "hebrew": [11, 49], "supervis": [11, 20, 32, 61], "l510": 11, "furthermor": [11, 35], "featur": [11, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "compress": [11, 21], "lilcom": [11, 21], "cutset": [11, 31], "recordingset": 11, "supervisionset": 11, "featureset": 11, "num_fram": 11, "635": 11, "num_featur": 11, "frame_shift": 11, "storage_typ": 11, "lilcom_chunki": 11, "storage_path": 11, "storage_kei": 11, "13000": 11, "3570": 11, "record": [11, 19, 25, 26, 33, 34, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "monocut": 11, "611": 11, "16570": 11, "12964": 11, "2929": 11, "602": 11, "32463": 11, "12936": 11, "2696": 11, "actual": [11, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "separ": [11, 27, 51], "lang": [11, 21, 22, 35, 38, 44], "quit": [12, 14, 16, 32, 33, 35, 36, 38, 41, 43, 44, 51, 55, 56, 57], "cuda_visible_devic": [12, 16, 21, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "usag": [12, 15, 16, 22, 24, 25, 26, 28, 29, 42, 46, 47, 49], "one": [12, 19, 22, 24, 25, 26, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59], "tini": [13, 14], "well": [13, 22, 49, 61], "hundr": 13, "thousand": 13, "virtualenv": [13, 21], "icefall_env": [13, 15], "interpret": 13, "usr": 13, "prefix": [13, 22], "pkg_resourc": 13, "wheel": [13, 21, 24], "remeb": 13, "continu": [13, 15, 24, 25, 26, 27, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "caution": [13, 33, 38], "matter": [13, 21, 24], "torchaduio": 13, "from_wheel": [13, 15, 21], "dev20231220": 13, "anytim": 13, "modulenotfounderror": 13, "don": [13, 14, 15, 21, 24, 25, 26, 28, 31, 33, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "walk": 14, "recognit": [14, 19, 20, 23, 24, 25, 32, 34, 35, 49, 61], "system": [14, 51], "out": [14, 39, 51], "minut": [14, 51], "sequenti": 14, "part": [14, 15, 19, 21, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57, 59, 60], "window": [14, 19, 23, 24, 25, 26, 27], "commun": 14, "appreci": 14, "virtual": 14, "curiou": 14, "quick": 14, "state_dict": [14, 20, 30, 33, 35, 36, 38, 42, 46, 47, 49], "jit": [14, 20, 23, 30, 38], "onnx": [14, 20, 22, 30, 59, 60], "torchscript": [15, 23, 28, 29, 30], "trace": [15, 20, 23, 28, 30], "explain": 15, "kind": [15, 38, 41, 43, 44, 55, 56, 57], "produc": [15, 23, 41, 43, 44, 55, 56, 57], "03": [15, 21, 22, 25, 35, 38, 46, 47, 55, 59], "912": [15, 22], "76": [15, 21, 49], "lr": [15, 21, 31, 35, 55], "weight_decai": [15, 21], "1e": [15, 21], "start_epoch": [15, 21], "best_train_loss": [15, 21, 22, 24, 25, 26], "inf": [15, 21, 22, 24, 25, 26], "best_valid_loss": [15, 21, 22, 24, 25, 26], "best_train_epoch": [15, 21, 22, 24, 25, 26], "best_valid_epoch": [15, 21, 22, 25, 26], "batch_idx_train": [15, 21, 22, 24, 25, 26], "log_interv": [15, 21, 22, 24, 25, 26], "reset_interv": [15, 21, 22, 24, 25, 26], "valid_interv": [15, 21, 22, 24, 25, 26], "beam_siz": [15, 21, 22, 35], "sum": [15, 21], "913": 15, "950": 15, "971": [15, 47], "106": [15, 21, 25, 38], "Not": 15, "974": 15, "111": [15, 21, 38], "kei": [15, 24, 25, 26, 38], "bia": 15, "running_mean": 15, "running_var": 15, "num_batches_track": 15, "output_linear": 15, "48": [15, 21, 24, 25, 33, 35], "089": 15, "090": 15, "ad79f1c699c684de9785ed6ca5edb805a41f78c3": 15, "wed": [15, 21, 24, 33, 35, 38], "26": [15, 21, 24, 25, 26, 35, 38, 47], "09": [15, 22, 25, 33, 35, 36, 38, 55], "aa073f6": 15, "none": [15, 21, 33, 38], "9a47c08": 15, "mon": [15, 25, 26], "aug": [15, 39], "50": [15, 21, 22, 24, 25, 26, 38, 41, 46, 55, 56, 57], "privat": 15, "fangjun": [15, 21, 22, 24, 25, 26, 35, 38], "macbook": 15, "pro": [15, 33, 38], "127": [15, 21, 24, 25, 49], "092": 15, "103": 15, "272": 15, "109": [15, 21, 33, 38], "112": [15, 24, 25, 26], "115": [15, 24, 25, 33, 38], "253": 15, "386": 15, "556": 15, "557": 15, "558": 15, "248": [15, 35], "559": 15, "315": [15, 24, 33, 35, 36, 38, 42], "ident": 15, "kaldifeat": 15, "csukuangfj": [15, 21, 22, 24, 25, 27, 33, 35, 36, 38, 42, 46, 47, 49, 55, 59], "dev20231221": 15, "0_0_0_1_0_0_0_1": [15, 49], "0_0_1_0_0_0_1_0": [15, 49], "19": [15, 22, 24, 25, 26, 31, 33, 38, 42, 46, 47], "208": [15, 38], "136": [15, 38], "num_class": [15, 33, 38, 49], "sample_r": [15, 22, 33, 35, 38, 49], "words_fil": [15, 33, 38, 49], "sound_fil": [15, 22, 33, 35, 38, 49], "142": [15, 24, 33, 36, 38], "144": [15, 38], "212": 15, "213": [15, 49], "construct": [15, 22, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47, 49], "170": [15, 42], "sound": [15, 22, 24, 25, 26, 29, 30, 33, 35, 36, 38, 42, 46, 47, 49], "224": 15, "176": [15, 24, 35, 38], "304": [15, 25], "214": [15, 35, 38], "47": [15, 21, 24, 25, 26, 31, 33, 38], "44": [15, 21, 24, 25, 38, 46, 47], "666": 15, "667": 15, "670": 15, "677": [15, 24], "100": [15, 21, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "843": 15, "cpu_jit": [15, 28, 33, 38, 41, 43, 44, 56, 57], "confus": [15, 28], "move": [15, 28, 41, 43, 44, 56, 57], "map_loc": 15, "resid": 15, "default": [15, 24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "jit_pretrain": [15, 29, 43, 44, 55], "nn": [15, 35, 41, 43, 44, 55, 56, 57], "56": [15, 21, 24, 25, 38, 46], "00": [15, 21, 24, 33, 35, 36, 38, 42, 46, 47, 49], "603": 15, "121": [15, 42], "nn_model": [15, 33, 38], "129": [15, 36], "640": [15, 21, 26], "134": [15, 33], "641": 15, "138": [15, 33, 35], "148": 15, "642": 15, "154": [15, 36], "727": 15, "190": [15, 42], "192": [15, 26, 38], "export_onnx": 15, "onnxruntim": [15, 27], "888": [15, 33], "83": [15, 38, 42], "892": 15, "diagnost": 15, "verbos": 15, "warn": 15, "21": [15, 21, 22, 24, 33, 35, 38, 46, 47], "047": [15, 35], "meta_data": 15, "model_typ": 15, "model_author": 15, "comment": 15, "non": [15, 20, 38, 53, 56, 61], "vocab_s": [15, 22, 24, 25, 26, 35], "049": 15, "140": [15, 21, 36], "int8": [15, 23, 30, 60], "quantiz": [15, 23, 30, 39], "075": 15, "onnx_quant": 15, "538": [15, 38], "tensor": [15, 21, 25, 26, 33, 35, 36, 38, 41, 49, 55, 56], "transpose_1_output_0": 15, "081": 15, "151": [15, 24], "float32": [15, 24, 25, 26], "onnx_pretrain": [15, 27], "260": [15, 26, 38], "166": 15, "171": [15, 21, 36, 38, 46, 47], "173": 15, "267": [15, 25, 35, 46, 47], "270": 15, "180": [15, 25, 33, 38], "279": [15, 38], "196": 15, "318": [15, 24, 25], "232": 15, "234": [15, 38], "deploi": [15, 27, 33, 38], "sherpa": [15, 19, 23, 28, 29, 30, 55], "framework": [15, 19, 41, 56], "_": [15, 39], "ncnn": [15, 20, 30], "youtub": [17, 20, 38, 39, 41, 42, 43, 44, 55, 56, 57], "video": [17, 20, 38, 39, 41, 42, 43, 44, 55, 56, 57], "upload": [18, 19, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "specif": [18, 27, 35], "aishel": [18, 20, 33, 35, 36, 37, 61], "wenetspeech": [18, 28], "ipad": 19, "phone": 19, "screenshot": [19, 33, 35, 36, 38, 39, 41, 49, 55, 56], "chines": [19, 34, 35], "english": [19, 31, 49, 55], "greedi": 19, "click": [19, 21, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "button": 19, "submit": 19, "wait": 19, "moment": 19, "bottom": [19, 41, 43, 44, 55, 56, 57], "subscrib": [19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "nadira": [19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "povei": [19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "www": [19, 21, 34, 38, 39, 41, 42, 43, 44, 51, 55, 56, 57], "uc_vaumpkminz1pnkfxan9mw": [19, 21, 38, 39, 41, 42, 43, 44, 55, 56, 57], "dummi": [20, 38], "toolkit": 20, "cudnn": 20, "docker": [20, 21], "frequent": 20, "ask": 20, "question": 20, "faq": 20, "oserror": 20, "libtorch_hip": 20, "attributeerror": 20, "distutil": 20, "attribut": [20, 26, 38], "libpython3": 20, "timit": [20, 37, 46, 47, 61], "tt": [20, 59, 60, 61], "vit": [20, 58, 61], "ljspeech": [20, 58, 61], "vctk": [20, 58, 61], "fine": [20, 39, 61], "finetun": [20, 32, 61], "zipform": [20, 23, 27, 30, 32, 37, 40, 51, 52, 54, 61], "contribut": 20, "support": [21, 23, 24, 25, 26, 33, 35, 38, 41, 43, 44, 53, 55, 56, 57, 59, 60], "guid": 21, "suggest": [21, 31, 41, 43, 44, 55, 56, 57], "strongli": 21, "point": [21, 22, 33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "sever": [21, 22, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 53, 55, 56, 57], "just": [21, 24, 25, 26, 51, 53], "kuangfangjun": [21, 24, 25, 26], "cpython3": 21, "final": [21, 22, 24, 25, 38, 42], "64": [21, 22, 24, 35, 56], "9422m": 21, "creator": 21, "cpython3posix": 21, "dest": 21, "star": [21, 24, 25, 26], "fj": [21, 22, 24, 25, 26, 35, 38], "clear": 21, "no_vcs_ignor": 21, "global": 21, "seeder": 21, "fromappdata": 21, "bundl": 21, "app_data_dir": 21, "ad": [21, 24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 49, 53, 55, 56, 57], "seed": 21, "bashactiv": 21, "cshellactiv": 21, "fishactiv": 21, "nushellactiv": 21, "powershellactiv": 21, "pythonactiv": 21, "determin": 21, "nvidia": [21, 33, 35, 36, 38], "smi": 21, "49": [21, 24, 25, 38, 47, 49], "510": 21, "driver": 21, "greater": 21, "our": [21, 24, 25, 26, 28, 29, 38, 39, 41, 53, 56, 57], "case": [21, 22, 24, 25, 26, 32, 41, 43, 44, 55, 56, 57], "verifi": 21, "nvcc": 21, "copyright": 21, "2005": 21, "2019": 21, "corpor": 21, "wed_oct_23_19": 21, "38_pdt_2019": 21, "v10": 21, "89": [21, 33], "cu116": 21, "compat": 21, "stabl": 21, "matrix": 21, "2bcu116": 21, "cp38": 21, "linux_x86_64": 21, "1983": 21, "mb": [21, 24, 25, 26], "________________________________________": 21, "gb": [21, 35], "764": 21, "kb": [21, 24, 25, 26, 46, 47], "eta": 21, "satisfi": 21, "extens": 21, "__version__": 21, "dev20230725": 21, "pypi": 21, "tuna": 21, "tsinghua": 21, "edu": 21, "cn": 21, "resolv": 21, "main": [21, 33, 38, 53], "ubuntu": [21, 24, 25, 26], "2bcuda11": 21, "manylinux_2_17_x86_64": 21, "manylinux2014_x86_64": 21, "graphviz": 21, "cach": [21, 26], "de": [21, 22, 24, 25, 26, 35], "5e": 21, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 21, "o": 21, "cento": 21, "2009": 21, "core": 21, "cmake": [21, 24, 25, 33, 38], "27": [21, 24, 25, 26, 31, 33, 35, 42, 47], "gcc": 21, "cmake_cuda_flag": 21, "wno": 21, "deprec": [21, 35], "lineinfo": 21, "expt": 21, "extend": 21, "lambda": 21, "use_fast_math": 21, "xptxa": 21, "gencod": 21, "arch": 21, "compute_35": 21, "sm_35": 21, "compute_50": 21, "sm_50": 21, "compute_60": 21, "sm_60": 21, "compute_61": 21, "sm_61": 21, "compute_70": 21, "sm_70": 21, "compute_75": 21, "sm_75": 21, "compute_80": 21, "sm_80": 21, "compute_86": 21, "sm_86": 21, "donnx_namespac": 21, "onnx_c2": 21, "compute_52": 21, "sm_52": 21, "xcudaf": 21, "diag_suppress": 21, "cc_clobber_ignor": 21, "integer_sign_chang": 21, "useless_using_declar": 21, "set_but_not_us": 21, "field_without_dll_interfac": 21, "base_class_has_different_dll_interfac": 21, "dll_interface_conflict_none_assum": 21, "dll_interface_conflict_dllexport_assum": 21, "implicit_return_from_non_void_funct": 21, "unsigned_compare_with_zero": 21, "declared_but_not_referenc": 21, "bad_friend_decl": 21, "relax": 21, "constexpr": 21, "d_glibcxx_use_cxx11_abi": 21, "option": [21, 23, 27, 30, 35, 39, 42, 46, 47, 49], "wall": 21, "strict": [21, 26, 34], "overflow": 21, "unknown": 21, "pragma": 21, "cmake_cxx_flag": 21, "unus": 21, "nvtx": 21, "disabl": [21, 22, 24, 25], "debug": 21, "sync": 21, "kernel": [21, 24, 26, 35], "memori": [21, 24, 33, 35, 38, 53], "alloc": 21, "214748364800": 21, "byte": [21, 24, 25, 26], "200": [21, 22, 24, 25, 26, 33, 38, 39, 46, 47, 49], "abort": 21, "__file__": 21, "cpython": [21, 24], "gnu": [21, 24], "req": 21, "vq12fd5i": 21, "filter": 21, "quiet": [21, 34], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 21, "metadata": [21, 46, 47], "pyproject": 21, "toml": 21, "cytoolz": 21, "3b": 21, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 21, "33": [21, 24, 25, 33, 34, 35, 38, 46], "pyyaml": 21, "c8": 21, "6b": 21, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 21, "ma": 21, "nylinux_2_17_x86_64": 21, "736": 21, "dataclass": 21, "2f": 21, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 21, "dev0": 21, "7640d66": 21, "a8": 21, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 21, "linux_2_17_x86_64": 21, "87": [21, 24], "tqdm": 21, "e6": 21, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 21, "numpi": 21, "audioread": 21, "5d": 21, "cb": 21, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 21, "377": 21, "tabul": 21, "40": [21, 24, 25, 26, 36, 38, 42, 46, 47], "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 21, "1a": 21, "70": 21, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 21, "97": [21, 24, 33], "ab": [21, 41, 55, 56, 57], "c3": 21, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 21, "intervaltre": 21, "fb": 21, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 21, "soundfil": 21, "bd": 21, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 21, "py2": 21, "46": [21, 25, 33, 38], "toolz": 21, "7f": 21, "5c": 21, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 21, "55": [21, 24, 36, 38, 46], "sortedcontain": 21, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 21, "cffi": 21, "b7": 21, "8b": 21, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 21, "15": [21, 22, 24, 25, 26, 31, 35, 36, 38, 46, 49, 51], "442": 21, "pycpars": 21, "d5": 21, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 21, "118": [21, 38], "filenam": [21, 24, 25, 26, 27, 28, 29, 43, 44, 55, 57, 59, 60], "size": [21, 22, 24, 25, 26, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "687627": 21, "sha256": 21, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 21, "ephem": 21, "wwtk90_m": 21, "7a": 21, "8e": 21, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 21, "23704": 21, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 21, "9c": 21, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 21, "26098": 21, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 21, "f3": 21, "ed": 21, "2b": 21, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 21, "remot": 21, "enumer": 21, "12942": 21, "count": 21, "total": [21, 25, 26, 33, 35, 36, 38, 39, 41, 42, 49, 55, 56], "delta": 21, "reus": 21, "pack": [21, 51, 56, 57], "12875": 21, "receiv": 21, "mib": 21, "8835": 21, "41": [21, 24, 26, 33, 35, 46, 49], "dl_dir": [21, 33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "___________________________________________________": 21, "70m": 21, "1mb": 21, "718": 21, "compute_fbank_yesno": 21, "_______________________________________________________________________________": 21, "90": [21, 24], "82it": 21, "778": 21, "______________________________________________________________________________": 21, "256": [21, 26, 46, 47], "92it": 21, "51": [21, 24, 33, 38, 49], "66": [21, 25, 31], "project": 21, "kaldilm": 21, "csrc": [21, 38], "arpa_file_pars": 21, "cc": 21, "void": 21, "arpafilepars": 21, "std": 21, "istream": 21, "79": 21, "92": [21, 38], "275": [21, 33], "compile_hlg": 21, "124": [21, 33, 38], "276": 21, "convert": [21, 24, 25, 26, 38], "309": 21, "ctc_topo": 21, "max_token_id": 21, "310": 21, "314": 21, "intersect": [21, 41, 56, 57], "323": 21, "lg": [21, 41, 44, 56, 57], "shape": [21, 26], "connect": [21, 22, 38, 41, 42, 55, 56, 57], "68": [21, 38], "class": [21, 38], "71": [21, 38, 42], "341": 21, "rag": 21, "raggedtensor": 21, "remov": [21, 33, 35, 36, 38, 42, 46, 47], "disambigu": 21, "354": 21, "91": 21, "remove_epsilon": 21, "445": 21, "arc": 21, "compos": 21, "h": 21, "446": 21, "447": 21, "segment": 21, "fault": 21, "dump": 21, "protocol_buffers_python_implement": 21, "674": 21, "interest": [21, 39, 41, 43, 44, 55, 56, 57], "936": 21, "481": 21, "482": 21, "world_siz": [21, 39], "master_port": 21, "12354": 21, "num_epoch": 21, "3fb0a43": 21, "thu": [21, 22, 24, 25, 26, 35, 38, 42], "05": [21, 22, 24, 25, 31, 33, 35, 36, 38, 47, 51, 60], "74279": [21, 22, 24, 25, 26, 35], "1220091118": 21, "57c4d55446": 21, "sph26": 21, "941": 21, "949": 21, "495": 21, "965": [21, 33], "146": 21, "244": 21, "967": 21, "149": [21, 24, 38], "199": [21, 38, 42], "singlecutsampl": 21, "205": [21, 38], "968": 21, "565": [21, 38], "422": 21, "loss": [21, 24, 25, 33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "065": 21, "over": [21, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "2436": 21, "frame": [21, 35, 41, 43, 56, 57], "tot_loss": 21, "681": [21, 24], "4561": 21, "2828": 21, "7076": 21, "22192": 21, "54": [21, 25, 26, 38, 42, 46, 47], "167": 21, "444": 21, "9002": 21, "18067": 21, "011": 21, "2555": 21, "2695": 21, "484": 21, "34971": 21, "331": [21, 24, 25, 38, 42], "4688": 21, "368": 21, "75": [21, 24], "633": 21, "2532": 21, "242": [21, 33, 38], "1139": 21, "1592": 21, "522": [21, 38], "1627": 21, "209": [21, 42], "07055": 21, "1175": 21, "07091": 21, "847": 21, "07731": 21, "427": [21, 25, 38], "04391": 21, "05341": 21, "884": 21, "04384": 21, "387": [21, 47], "03458": 21, "04616": 21, "707": [21, 33, 38], "03379": 21, "758": [21, 38], "433": [21, 38], "01054": 21, "980": [21, 38], "009014": 21, "009974": 21, "489": [21, 33], "01085": 21, "258": [21, 46, 47], "01172": 21, "01055": 21, "621": [21, 49], "01074": 21, "699": 21, "866": 21, "01044": 21, "844": 21, "008942": 21, "221": [21, 38], "01082": 21, "970": [21, 38], "01169": 21, "247": 21, "01073": 21, "326": [21, 25], "555": 21, "840": 21, "841": 21, "855": 21, "868": 21, "882": 21, "883": 21, "157": 21, "701": 21, "702": [21, 38], "704": [21, 33, 46], "fun": [21, 24, 25], "variou": [21, 27, 30, 61], "period": [22, 24], "disk": 22, "optim": [22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "resum": [22, 31, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "strip": 22, "reduc": [22, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "pruned_transducer_stateless3": [22, 28, 53], "almost": [22, 41, 53, 56, 57], "dict": [22, 26], "stateless3": [22, 24], "repo": [22, 27], "those": 22, "wave": [22, 24, 25, 26, 33, 38], "iter": [22, 24, 25, 26, 29, 41, 43, 44, 55, 56, 57], "1224000": 22, "greedy_search": [22, 31, 35, 41, 43, 55, 56, 57], "test_wav": [22, 24, 25, 26, 27, 33, 35, 36, 38, 42, 46, 47, 49], "1089": [22, 24, 25, 26, 27, 38, 42], "134686": [22, 24, 25, 26, 27, 38, 42], "0001": [22, 24, 25, 26, 27, 38, 42], "1221": [22, 24, 25, 38, 42], "135766": [22, 24, 25, 38, 42], "0002": [22, 24, 25, 38, 42], "multipl": [22, 33, 35, 36, 38, 42, 46, 47, 49], "Its": [22, 24, 25, 26, 38], "233": [22, 24, 25], "265": 22, "3000": [22, 24, 25, 26], "80": [22, 24, 25, 26, 33, 35, 38], "subsampling_factor": [22, 25, 26, 33, 35, 38], "encoder_dim": [22, 24, 25, 26], "512": [22, 24, 25, 26, 33, 35, 38], "nhead": [22, 24, 26, 33, 35, 38, 41, 56], "dim_feedforward": [22, 24, 25, 35], "num_encoder_lay": [22, 24, 25, 26, 35], "decoder_dim": [22, 24, 25, 26], "joiner_dim": [22, 24, 25, 26], "model_warm_step": [22, 24, 25], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 22, "oct": [22, 38], "miss": [22, 24, 25, 26, 35, 38], "cu102": [22, 24, 25, 26], "1013": 22, "c39cba5": 22, "dirti": [22, 24, 25, 33, 38], "ceph": [22, 33, 35, 38], "0324160024": 22, "65bfd8b584": 22, "jjlbn": 22, "bpe_model": [22, 24, 25, 26, 38], "16000": [22, 33, 35, 36, 38, 42, 43, 46, 47], "max_context": 22, "max_stat": 22, "context_s": [22, 24, 25, 26, 35], "max_sym_per_fram": [22, 35], "simulate_stream": 22, "decode_chunk_s": 22, "left_context": 22, "dynamic_chunk_train": 22, "causal_convolut": 22, "short_chunk_s": [22, 26, 56, 57], "num_left_chunk": [22, 26], "blank_id": [22, 24, 25, 26, 35], "unk_id": 22, "271": [22, 25], "612": 22, "458": 22, "giga": [22, 25, 55], "623": 22, "277": 22, "78648040": 22, "951": [22, 38], "285": [22, 35, 38], "952": 22, "295": [22, 33, 35, 36, 38], "957": 22, "301": [22, 38], "700": 22, "329": [22, 25, 38], "388": 22, "earli": [22, 24, 25, 26, 38, 42], "nightfal": [22, 24, 25, 26, 38, 42], "THE": [22, 24, 25, 26, 38, 42], "yellow": [22, 24, 25, 26, 38, 42], "lamp": [22, 24, 25, 26, 38, 42], "light": [22, 24, 25, 26, 38, 42], "AND": [22, 24, 25, 26, 38, 42], "THERE": [22, 24, 25, 26, 38, 42], "squalid": [22, 24, 25, 26, 38, 42], "quarter": [22, 24, 25, 26, 38, 42], "OF": [22, 24, 25, 26, 38, 42], "brothel": [22, 24, 25, 26, 38, 42], "god": [22, 38, 42], "AS": [22, 38, 42], "direct": [22, 38, 42], "consequ": [22, 38, 42], "sin": [22, 38, 42], "man": [22, 38, 42], "punish": [22, 38, 42], "had": [22, 38, 42], "her": [22, 38, 42], "love": [22, 38, 42], "child": [22, 38, 42], "whose": [22, 35, 38, 42], "ON": [22, 24, 38, 42], "THAT": [22, 38, 42], "dishonor": [22, 38, 42], "bosom": [22, 38, 42], "TO": [22, 38, 42], "parent": [22, 38, 42], "forev": [22, 38, 42], "WITH": [22, 38, 42], "race": [22, 38, 42], "descent": [22, 38, 42], "mortal": [22, 38, 42], "BE": [22, 38, 42], "bless": [22, 38, 42], "soul": [22, 38, 42], "IN": [22, 38, 42], "heaven": [22, 38, 42], "yet": [22, 24, 25, 38, 42], "THESE": [22, 38, 42], "thought": [22, 38, 42], "affect": [22, 38, 42], "hester": [22, 38, 42], "prynn": [22, 38, 42], "hope": [22, 34, 38, 42], "apprehens": [22, 38, 42], "390": 22, "down": [22, 33, 38, 41, 43, 44, 55, 56, 57], "reproduc": [22, 38], "9999": [22, 43, 44, 55], "symlink": 22, "pass": [22, 26, 33, 35, 36, 38, 41, 43, 44, 53, 55, 56, 57], "convemform": [23, 30, 53], "platform": [23, 27], "android": [23, 24, 25, 26, 27], "raspberri": [23, 27], "pi": [23, 27], "\u7231\u82af\u6d3e": 23, "maix": 23, "iii": 23, "axera": 23, "rv1126": 23, "static": 23, "binari": [23, 24, 25, 26, 33, 35, 36, 38, 41, 49, 55, 56], "pnnx": [23, 30], "encod": [23, 27, 29, 30, 33, 35, 36, 38, 41, 42, 43, 49, 53, 55, 56, 57], "conv": [24, 25], "emform": [24, 25, 28], "stateless2": [24, 25, 55], "pretrained_model": [24, 25, 26], "online_transduc": 24, "jit_xxx": [24, 25, 26], "anywher": [24, 25], "submodul": 24, "recurs": 24, "init": 24, "dcmake_build_typ": [24, 33, 38], "dncnn_python": 24, "dncnn_build_benchmark": 24, "dncnn_build_exampl": 24, "dncnn_build_tool": 24, "j4": 24, "pwd": 24, "src": [24, 26], "compon": [24, 53], "ncnn2int8": [24, 25], "am": 24, "sai": [24, 25, 26, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57], "later": [24, 25, 26, 33, 36, 38, 41, 42, 43, 44, 46, 47, 55, 56, 57], "termin": 24, "tencent": [24, 25], "modif": [24, 35], "offici": 24, "synchron": 24, "renam": [24, 25, 26], "conv_emformer_transducer_stateless2": [24, 53], "length": [24, 26, 35, 51, 56, 57], "cnn": [24, 26], "31": [24, 25, 26, 38], "context": [24, 35, 41, 53, 55, 56, 57], "configur": [24, 26, 35, 39, 42, 46, 47, 49, 59, 60], "accordingli": [24, 25, 26], "yourself": [24, 25, 26, 39, 56, 57], "220": [24, 35, 36, 38], "229": [24, 33], "best_v": 24, "alid_epoch": 24, "subsampl": [24, 56, 57], "ing_factor": 24, "a34171ed85605b0926eebbd0463d059431f4f74a": 24, "dec": 24, "ver": 24, "ion": 24, "530e8a1": 24, "op": 24, "1220120619": [24, 25, 26], "7695ff496b": [24, 25, 26], "s9n4w": [24, 25, 26], "icefa": 24, "ll": 24, "transdu": 24, "cer": 24, "use_averaged_model": [24, 25, 26], "cnn_module_kernel": [24, 26], "left_context_length": 24, "chunk_length": 24, "right_context_length": 24, "memory_s": 24, "231": [24, 25, 26], "053": 24, "022": 24, "708": [24, 33, 35, 38, 49], "75490012": 24, "320": [24, 35], "682": 24, "lh": [24, 25, 26], "rw": [24, 25, 26], "289m": 24, "jan": [24, 25, 26], "289": 24, "roughli": [24, 25, 26], "equal": [24, 25, 26, 56, 57], "1024": [24, 25, 26, 55], "287": [24, 49], "1010k": [24, 25], "decoder_jit_trac": [24, 25, 26, 29, 55, 57], "283m": 24, "encoder_jit_trac": [24, 25, 26, 29, 55, 57], "0m": [24, 25], "joiner_jit_trac": [24, 25, 26, 29, 55, 57], "sure": [24, 25, 26], "found": [24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "param": [24, 25, 26], "503k": [24, 25], "437": [24, 25, 26], "142m": 24, "79k": 24, "5m": [24, 25], "architectur": [24, 25, 26, 55], "editor": [24, 25, 26], "content": [24, 25, 26], "283": [24, 26], "1010": [24, 25], "503": [24, 25], "convers": [24, 25, 26], "half": [24, 25, 26, 41, 56, 57], "v": [24, 25, 26, 38, 46, 47], "float16": [24, 25, 26], "occupi": [24, 25, 26], "twice": [24, 25, 26], "smaller": [24, 25, 26, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "fp16": [24, 25, 26, 31, 41, 43, 44, 51, 55, 56, 57, 59, 60], "won": [24, 25, 26, 27, 33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "accept": [24, 25, 26], "216": [24, 33, 38, 46, 47], "encoder_param_filenam": [24, 25, 26], "encoder_bin_filenam": [24, 25, 26], "decoder_param_filenam": [24, 25, 26], "decoder_bin_filenam": [24, 25, 26], "joiner_param_filenam": [24, 25, 26], "joiner_bin_filenam": [24, 25, 26], "sound_filenam": [24, 25, 26], "141": 24, "328": 24, "336": 24, "106000": [24, 25, 26, 38, 42], "581": [24, 42], "381": 24, "7767517": [24, 25, 26], "1060": 24, "1342": 24, "in0": [24, 25, 26], "explan": [24, 25, 26], "magic": [24, 25, 26], "intermedi": [24, 25, 26], "increment": [24, 25, 26], "1061": 24, "sherpametadata": [24, 25, 26], "sherpa_meta_data1": [24, 25, 26], "newli": [24, 25, 26], "must": [24, 25, 26, 56], "eas": [24, 25, 26], "pair": [24, 25, 26], "sad": [24, 25, 26], "rememb": [24, 25, 26], "anymor": [24, 25, 26], "flexibl": [24, 25, 26], "edit": [24, 25, 26], "arm": [24, 25, 26], "aarch64": [24, 25, 26], "onc": [24, 25], "mayb": [24, 25], "year": [24, 25], "_jit_trac": [24, 25], "fp32": [24, 25], "doubl": [24, 25], "j": [24, 25, 33, 38], "py38": [24, 25, 26], "arg": [24, 25], "wave_filenam": [24, 25], "16k": [24, 25], "hz": [24, 25, 46, 47], "mono": [24, 25], "calibr": [24, 25], "cat": [24, 25], "eof": [24, 25], "calcul": [24, 25, 43, 56, 57], "has_gpu": [24, 25], "config": [24, 25], "use_vulkan_comput": [24, 25], "88": [24, 35], "conv_87": 24, "942385": [24, 25], "threshold": [24, 25, 43], "938493": 24, "968131": 24, "conv_88": 24, "442448": 24, "549335": 24, "167552": 24, "conv_89": 24, "228289": 24, "001738": 24, "871552": 24, "linear_90": 24, "976146": 24, "101789": 24, "267128": 24, "linear_91": 24, "962030": 24, "162033": 24, "602713": 24, "linear_92": 24, "323041": 24, "853959": 24, "953129": 24, "linear_94": 24, "905416": 24, "648006": 24, "323545": 24, "linear_93": 24, "474093": 24, "200188": 24, "linear_95": 24, "888012": 24, "403563": 24, "483986": 24, "linear_96": 24, "856741": 24, "398679": 24, "524273": 24, "linear_97": 24, "635942": 24, "613655": 24, "590950": 24, "linear_98": 24, "460340": 24, "670146": 24, "398010": 24, "linear_99": 24, "532276": 24, "585537": 24, "119396": 24, "linear_101": 24, "585871": 24, "719224": 24, "205809": 24, "linear_100": 24, "751382": 24, "081648": 24, "linear_102": 24, "593344": 24, "450581": 24, "551147": 24, "linear_103": 24, "592681": 24, "705824": 24, "257959": 24, "linear_104": 24, "752957": 24, "980955": 24, "110489": 24, "linear_105": 24, "696240": 24, "877193": 24, "608953": 24, "linear_106": 24, "059659": 24, "643138": 24, "048950": 24, "linear_108": 24, "975461": 24, "589567": 24, "671457": 24, "linear_107": 24, "190381": 24, "515701": 24, "linear_109": 24, "710759": 24, "305635": 24, "082436": 24, "linear_110": 24, "531228": 24, "731162": 24, "159557": 24, "linear_111": 24, "528083": 24, "259322": 24, "211544": 24, "linear_112": 24, "148807": 24, "500842": 24, "087374": 24, "linear_113": 24, "592566": 24, "948851": 24, "166611": 24, "linear_115": 24, "437109": 24, "608947": 24, "642395": 24, "linear_114": 24, "193942": 24, "503904": 24, "linear_116": 24, "966980": 24, "200896": 24, "676392": 24, "linear_117": 24, "451303": 24, "061664": 24, "951344": 24, "linear_118": 24, "077262": 24, "965800": 24, "023804": 24, "linear_119": 24, "671615": 24, "847613": 24, "198460": 24, "linear_120": 24, "625638": 24, "131427": 24, "556595": 24, "linear_122": 24, "274080": 24, "888716": 24, "978189": 24, "linear_121": 24, "420480": 24, "429659": 24, "linear_123": 24, "826197": 24, "599617": 24, "281532": 24, "linear_124": 24, "396383": 24, "325849": 24, "335875": 24, "linear_125": 24, "337198": 24, "941410": 24, "221970": 24, "linear_126": 24, "699965": 24, "842878": 24, "224073": 24, "linear_127": 24, "775370": 24, "884215": 24, "696438": 24, "linear_129": 24, "872276": 24, "837319": 24, "254213": 24, "linear_128": 24, "180057": 24, "687883": 24, "linear_130": 24, "150427": 24, "454298": 24, "765789": 24, "linear_131": 24, "112692": 24, "924847": 24, "025545": 24, "linear_132": 24, "852893": 24, "116593": 24, "749626": 24, "linear_133": 24, "517084": 24, "024665": 24, "275314": 24, "linear_134": 24, "683807": 24, "878618": 24, "743618": 24, "linear_136": 24, "421055": 24, "322729": 24, "086264": 24, "linear_135": 24, "309880": 24, "917679": 24, "linear_137": 24, "827781": 24, "744595": 24, "915554": 24, "linear_138": 24, "422395": 24, "742882": 24, "402161": 24, "linear_139": 24, "527538": 24, "866123": 24, "849449": 24, "linear_140": 24, "128619": 24, "657793": 24, "266134": 24, "linear_141": 24, "839593": 24, "845993": 24, "021378": 24, "linear_143": 24, "442304": 24, "099039": 24, "889746": 24, "linear_142": 24, "325038": 24, "849592": 24, "linear_144": 24, "929444": 24, "618206": 24, "605080": 24, "linear_145": 24, "382126": 24, "321095": 24, "625010": 24, "linear_146": 24, "894987": 24, "867645": 24, "836517": 24, "linear_147": 24, "915313": 24, "906028": 24, "886522": 24, "linear_148": 24, "614287": 24, "908151": 24, "496181": 24, "linear_150": 24, "724932": 24, "485588": 24, "312899": 24, "linear_149": 24, "161146": 24, "606939": 24, "linear_151": 24, "164453": 24, "847355": 24, "719223": 24, "linear_152": 24, "086471": 24, "984121": 24, "222834": 24, "linear_153": 24, "099524": 24, "991601": 24, "816805": 24, "linear_154": 24, "054585": 24, "489706": 24, "286930": 24, "linear_155": 24, "389185": 24, "100321": 24, "963501": 24, "linear_157": 24, "982999": 24, "154796": 24, "637253": 24, "linear_156": 24, "537706": 24, "875190": 24, "linear_158": 24, "420287": 24, "502287": 24, "531588": 24, "linear_159": 24, "014746": 24, "423280": 24, "477261": 24, "linear_160": 24, "633553": 24, "715335": 24, "220921": 24, "linear_161": 24, "371849": 24, "117830": 24, "815203": 24, "linear_162": 24, "492933": 24, "126283": 24, "623318": 24, "linear_164": 24, "697504": 24, "825712": 24, "317358": 24, "linear_163": 24, "078367": 24, "008038": 24, "linear_165": 24, "023975": 24, "836278": 24, "577358": 24, "linear_166": 24, "860619": 24, "259792": 24, "493614": 24, "linear_167": 24, "380934": 24, "496160": 24, "107042": 24, "linear_168": 24, "691216": 24, "733317": 24, "831076": 24, "linear_169": 24, "723948": 24, "952728": 24, "129707": 24, "linear_171": 24, "034811": 24, "366547": 24, "665123": 24, "linear_170": 24, "356277": 24, "710501": 24, "linear_172": 24, "556884": 24, "729481": 24, "166058": 24, "linear_173": 24, "033039": 24, "207264": 24, "442120": 24, "linear_174": 24, "597379": 24, "658676": 24, "768131": 24, "linear_2": [24, 25], "293503": 24, "305265": 24, "877850": 24, "linear_1": [24, 25], "812222": 24, "766452": 24, "487047": 24, "linear_3": [24, 25], "999999": 24, "999755": 24, "031174": 24, "wish": [24, 25], "955k": 24, "18k": 24, "inparam": [24, 25], "inbin": [24, 25], "outparam": [24, 25], "outbin": [24, 25], "99m": 24, "78k": 24, "774k": [24, 25], "496": [24, 25, 38, 42], "replac": [24, 25], "774": [24, 25], "linear": [24, 25, 35], "convolut": [24, 25, 43, 53, 56], "exact": [24, 25], "4x": [24, 25], "comparison": 24, "468000": [25, 29, 55], "lstm_transducer_stateless2": [25, 29, 55], "862": 25, "222": [25, 36, 38], "865": 25, "is_pnnx": 25, "62e404dd3f3a811d73e424199b3408e309c06e1a": [25, 26], "6d7a559": [25, 26], "feb": [25, 26, 35], "147": [25, 26], "rnn_hidden_s": 25, "aux_layer_period": 25, "235": 25, "239": [25, 35], "472": 25, "595": 25, "324": 25, "83137520": 25, "596": 25, "325": 25, "257024": 25, "781812": 25, "327": 25, "84176356": 25, "182": [25, 26, 33, 42], "158": 25, "183": [25, 46, 47], "335": 25, "101": 25, "tracerwarn": [25, 26], "boolean": [25, 26], "caus": [25, 26, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "incorrect": [25, 26, 35], "flow": [25, 26], "constant": [25, 26], "futur": [25, 26, 35, 61], "need_pad": 25, "bool": 25, "259": [25, 33], "339": 25, "207": [25, 36, 38], "84": [25, 33], "324m": 25, "321": [25, 33], "107": [25, 42], "318m": 25, "159m": 25, "21k": 25, "159": [25, 38, 49], "37": [25, 33, 35, 38, 46], "861": 25, "266": [25, 26, 38, 42], "431": 25, "342": 25, "343": 25, "379": 25, "268": [25, 38, 42], "317m": 25, "317": 25, "conv_15": 25, "930708": 25, "972025": 25, "conv_16": 25, "978855": 25, "031788": 25, "456645": 25, "conv_17": 25, "868437": 25, "830528": 25, "218575": 25, "linear_18": 25, "107259": 25, "194808": 25, "293236": 25, "linear_19": 25, "193777": 25, "634748": 25, "401705": 25, "linear_20": 25, "259933": 25, "606617": 25, "722160": 25, "linear_21": 25, "186600": 25, "790260": 25, "512129": 25, "linear_22": 25, "759041": 25, "265832": 25, "050053": 25, "linear_23": 25, "931209": 25, "099090": 25, "979767": 25, "linear_24": 25, "324160": 25, "215561": 25, "321835": 25, "linear_25": 25, "800708": 25, "599352": 25, "284134": 25, "linear_26": 25, "492444": 25, "153369": 25, "274391": 25, "linear_27": 25, "660161": 25, "720994": 25, "674126": 25, "linear_28": 25, "415265": 25, "174434": 25, "007133": 25, "linear_29": 25, "038418": 25, "118534": 25, "724262": 25, "linear_30": 25, "072084": 25, "936867": 25, "259155": 25, "linear_31": 25, "342712": 25, "599489": 25, "282787": 25, "linear_32": 25, "340535": 25, "120308": 25, "701103": 25, "linear_33": 25, "846987": 25, "630030": 25, "985939": 25, "linear_34": 25, "686298": 25, "204571": 25, "607586": 25, "linear_35": 25, "904821": 25, "575518": 25, "756420": 25, "linear_36": 25, "806659": 25, "585589": 25, "118401": 25, "linear_37": 25, "402340": 25, "047157": 25, "162680": 25, "linear_38": 25, "174589": 25, "923361": 25, "030258": 25, "linear_39": 25, "178576": 25, "556058": 25, "807705": 25, "linear_40": 25, "901954": 25, "301267": 25, "956539": 25, "linear_41": 25, "839805": 25, "597429": 25, "716181": 25, "linear_42": 25, "178945": 25, "651595": 25, "895699": 25, "829245": 25, "627592": 25, "637907": 25, "746186": 25, "255032": 25, "167313": 25, "000000": 25, "999756": 25, "031013": 25, "345k": 25, "17k": 25, "218m": 25, "counterpart": 25, "bit": [25, 33, 35, 36, 38, 42, 49], "4532": 25, "feedforward": [26, 35, 41, 56], "384": [26, 38], "unmask": 26, "downsampl": [26, 34], "factor": [26, 33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "473": [26, 38], "246": [26, 35, 38, 46, 47], "477": 26, "warm_step": 26, "2000": [26, 36], "feedforward_dim": 26, "attention_dim": [26, 33, 35, 38], "encoder_unmasked_dim": 26, "zipformer_downsampling_factor": 26, "decode_chunk_len": 26, "257": [26, 35, 46, 47], "023": 26, "zipformer2": 26, "419": 26, "At": [26, 33, 38], "stack": 26, "downsampling_factor": 26, "037": 26, "655": 26, "346": 26, "68944004": 26, "347": 26, "260096": 26, "348": [26, 46], "716276": 26, "656": [26, 38], "349": 26, "69920376": 26, "351": 26, "353": 26, "174": [26, 38], "175": 26, "1344": 26, "assert": 26, "cached_len": 26, "num_lay": 26, "1348": 26, "cached_avg": 26, "1352": 26, "cached_kei": 26, "1356": 26, "cached_v": 26, "1360": 26, "cached_val2": 26, "1364": 26, "cached_conv1": 26, "1368": 26, "cached_conv2": 26, "1373": 26, "left_context_len": 26, "1884": 26, "x_size": 26, "2442": 26, "2449": 26, "2469": 26, "2473": 26, "2483": 26, "kv_len": 26, "k": [26, 41, 46, 47, 55, 56, 57], "2570": 26, "attn_output": 26, "bsz": 26, "num_head": 26, "seq_len": 26, "head_dim": 26, "2926": 26, "lorder": 26, "2652": 26, "2653": 26, "embed_dim": 26, "2666": 26, "1543": 26, "in_x_siz": 26, "1637": 26, "1643": 26, "in_channel": 26, "1571": 26, "1763": 26, "src1": 26, "src2": 26, "1779": 26, "dim1": 26, "1780": 26, "dim2": 26, "_trace": 26, "958": 26, "tracer": 26, "instead": [26, 35, 56], "tupl": 26, "namedtupl": 26, "absolut": 26, "know": [26, 39], "side": 26, "allow": [26, 41, 56], "behavior": [26, 35], "_c": 26, "_create_method_from_trac": 26, "646": 26, "357": 26, "102": [26, 33], "embedding_out": 26, "686": 26, "361": [26, 38, 42], "735": 26, "69": 26, "269m": 26, "269": [26, 33, 46, 47], "725": [26, 42], "1022k": 26, "266m": 26, "8m": 26, "509k": 26, "133m": 26, "152k": 26, "4m": 26, "1022": 26, "133": 26, "509": 26, "360": 26, "365": 26, "280": [26, 38], "372": [26, 33], "state": [26, 33, 35, 36, 38, 41, 43, 44, 51, 55, 56, 57], "026": 26, "410": 26, "411": [26, 38], "2028": 26, "2547": 26, "2029": 26, "23316": 26, "23317": 26, "23318": 26, "23319": 26, "23320": 26, "amount": [26, 32, 34], "pad": [26, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "conv2dsubsampl": 26, "arrai": 26, "23300": 26, "element": 26, "repo_url": 27, "basenam": 27, "why": 28, "streaming_asr": [28, 29, 55, 56, 57], "conv_emform": 28, "offline_asr": [28, 41], "baz": 29, "subset": [31, 38, 41, 43, 44, 55, 56, 57], "instruct": 31, "full": [31, 38, 39, 41, 43, 44, 55, 56, 57], "intial": 31, "decode_gigaspeech": 31, "1000": [31, 38, 59, 60], "whole": [31, 38, 42, 46, 47, 56, 57], "previou": [31, 51], "stateless": [31, 34, 37, 41, 55, 56, 57], "due": [31, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "vocabulari": [31, 35], "experi": [31, 33, 35, 36, 38, 39, 41, 43, 44, 49, 55, 56, 57], "use_mux": 31, "do_finetun": 31, "world": [31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 51, 55, 56, 57, 59, 60], "exp_giga_finetun": 31, "_mux": 31, "0045": 31, "mux": 31, "13024": 31, "ckpt": 31, "forget": 31, "quickli": 31, "certain": [31, 32], "mix": 31, "maintain": 31, "ones": 31, "lower": [31, 55], "public": 32, "capabl": 32, "high": [32, 34, 59], "label": 32, "1best": [33, 36, 38, 42, 43, 44, 46, 47], "automag": [33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "stop": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "By": [33, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "musan": [33, 36, 38, 39, 41, 43, 44, 55, 56, 57], "apt": [33, 36], "permiss": [33, 36], "commandlin": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "multi": [33, 35, 36, 38, 39, 41, 43, 44, 53, 55, 56, 57], "machin": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "ddp": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "implement": [33, 35, 36, 38, 39, 41, 43, 44, 53, 55, 56, 57], "utter": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "oom": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "decai": [33, 36, 38, 43, 44, 55], "warmup": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "function": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "get_param": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "directli": [33, 35, 36, 38, 39, 41, 43, 44, 55, 56, 57], "perturb": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "3x150": [33, 35, 36], "450": [33, 35, 36], "visual": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "logdir": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "labelsmooth": 33, "tensorflow": [33, 35, 36, 38, 41, 43, 44, 49, 55, 56], "press": [33, 35, 36, 38, 41, 43, 44, 49, 55, 56, 57], "ctrl": [33, 35, 36, 38, 41, 43, 44, 49, 55, 56, 57], "engw8ksktzqs24zbv5dgcg": 33, "2021": [33, 36, 38, 42, 46, 47, 49], "22t11": 33, "scan": [33, 35, 36, 38, 41, 49, 55, 56], "116068": 33, "scalar": [33, 35, 36, 38, 41, 49, 55, 56], "listen": [33, 35, 36, 41, 49, 55, 56], "xxxx": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "saw": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "consol": [33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "avoid": [33, 35, 38], "nbest": [33, 38, 44], "lattic": [33, 36, 38, 41, 42, 46, 47, 56, 57], "uniqu": [33, 38, 41, 56, 57], "pkufool": [33, 36, 42], "icefall_asr_aishell_conformer_ctc": 33, "transcrib": [33, 35, 36, 38], "lang_char": [33, 35], "bac009s0764w0121": [33, 35, 36], "bac009s0764w0122": [33, 35, 36], "bac009s0764w0123": [33, 35, 36], "tran": [33, 36, 38, 42, 46, 47], "graph": [33, 36, 38, 41, 42, 46, 47, 56, 57], "conveni": [33, 36, 38, 39], "eo": [33, 36, 38], "soxi": [33, 35, 36, 38, 42, 49], "sampl": [33, 35, 36, 38, 42, 43, 49, 56, 57], "precis": [33, 35, 36, 38, 41, 42, 49, 56, 57], "67263": [33, 35, 36], "cdda": [33, 35, 36, 38, 42, 49], "sector": [33, 35, 36, 38, 42, 49], "135k": [33, 35, 36], "256k": [33, 35, 36, 38], "sign": [33, 35, 36, 38, 49], "integ": [33, 35, 36, 38, 49], "pcm": [33, 35, 36, 38, 49], "65840": [33, 35, 36], "308": [33, 35, 36], "625": [33, 35, 36], "132k": [33, 35, 36], "64000": [33, 35, 36], "300": [33, 35, 36, 38, 39, 41, 51, 56], "128k": [33, 35, 36, 49], "displai": [33, 35, 36, 38], "topologi": [33, 38], "num_decoder_lay": [33, 38], "vgg_frontend": [33, 35, 38], "use_feat_batchnorm": [33, 38], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 33, "sun": 33, "sep": 33, "33cfe45": 33, "d57a873": 33, "nov": [33, 38], "hw": 33, "kangwei": 33, "icefall_aishell3": 33, "k2_releas": 33, "tokens_fil": 33, "num_path": [33, 38, 41, 56, 57], "ngram_lm_scal": [33, 38], "attention_decoder_scal": [33, 38], "nbest_scal": [33, 38], "sos_id": [33, 38], "eos_id": [33, 38], "4336": [33, 35], "131": [33, 38], "293": [33, 38], "369": [33, 38], "\u751a": [33, 35], "\u81f3": [33, 35], "\u51fa": [33, 35], "\u73b0": [33, 35], "\u4ea4": [33, 35], "\u6613": [33, 35], "\u51e0": [33, 35], "\u4e4e": [33, 35], "\u505c": [33, 35], "\u6b62": 33, "\u7684": [33, 35, 36], "\u60c5": [33, 35], "\u51b5": [33, 35], "\u4e00": [33, 35], "\u4e8c": [33, 35], "\u7ebf": [33, 35, 36], "\u57ce": [33, 35], "\u5e02": [33, 35], "\u867d": [33, 35], "\u7136": [33, 35], "\u4e5f": [33, 35, 36], "\u5904": [33, 35], "\u4e8e": [33, 35], "\u8c03": [33, 35], "\u6574": [33, 35], "\u4e2d": [33, 35, 36], "\u4f46": [33, 35, 36], "\u56e0": [33, 35], "\u4e3a": [33, 35], "\u805a": [33, 35], "\u96c6": [33, 35], "\u4e86": [33, 35, 36], "\u8fc7": [33, 35], "\u591a": [33, 35], "\u516c": [33, 35], "\u5171": [33, 35], "\u8d44": [33, 35], "\u6e90": [33, 35], "371": 33, "683": 33, "684": [33, 49], "651": [33, 49], "654": 33, "659": 33, "752": 33, "887": 33, "340": 33, "370": 33, "\u751a\u81f3": [33, 36], "\u51fa\u73b0": [33, 36], "\u4ea4\u6613": [33, 36], "\u51e0\u4e4e": [33, 36], "\u505c\u6b62": 33, "\u60c5\u51b5": [33, 36], "\u4e00\u4e8c": [33, 36], "\u57ce\u5e02": [33, 36], "\u867d\u7136": [33, 36], "\u5904\u4e8e": [33, 36], "\u8c03\u6574": [33, 36], "\u56e0\u4e3a": [33, 36], "\u805a\u96c6": [33, 36], "\u8fc7\u591a": [33, 36], "\u516c\u5171": [33, 36], "\u8d44\u6e90": [33, 36], "recor": [33, 38], "highest": [33, 38], "966": 33, "821": 33, "822": 33, "826": 33, "916": 33, "345": 33, "889": 33, "limit": [33, 35, 38, 53, 56], "upgrad": [33, 38], "NOT": [33, 35, 38, 49], "checkout": [33, 38], "hlg_decod": [33, 38], "four": [33, 38], "messag": [33, 38, 41, 43, 44, 55, 56, 57], "use_gpu": [33, 38], "word_tabl": [33, 38], "forward": [33, 38, 43], "cu": [33, 38], "int": [33, 38], "char": [33, 38], "98": 33, "150": [33, 38], "693": [33, 46], "165": [33, 38], "nnet_output": [33, 38], "185": [33, 38, 49], "217": [33, 38], "mandarin": 34, "beij": 34, "shell": 34, "technologi": 34, "ltd": 34, "peopl": 34, "accent": 34, "area": 34, "china": 34, "invit": 34, "particip": 34, "conduct": 34, "indoor": 34, "fidel": 34, "microphon": 34, "16khz": 34, "manual": 34, "through": 34, "profession": 34, "annot": 34, "inspect": 34, "free": [34, 39, 51, 55], "academ": 34, "moder": 34, "research": 34, "openslr": [34, 51], "ctc": [34, 37, 40, 44, 45, 48], "conv1d": [35, 41, 55, 56, 57], "tanh": 35, "borrow": 35, "ieeexplor": 35, "ieee": 35, "stamp": 35, "jsp": 35, "arnumb": 35, "9054419": 35, "predict": [35, 39, 41, 55, 56, 57], "charact": 35, "unit": 35, "87939824": 35, "optimized_transduc": 35, "technqiu": 35, "end": [35, 41, 43, 44, 49, 55, 56, 57, 59, 60], "maximum": 35, "emit": 35, "simplifi": [35, 53], "significantli": 35, "degrad": 35, "exactli": 35, "unprun": 35, "advantag": 35, "minim": 35, "pruned_transducer_stateless": [35, 41, 53, 56], "altern": 35, "though": 35, "transducer_stateless_modifi": 35, "pr": 35, "ram": 35, "tri": 35, "prob": [35, 55], "219": [35, 38], "lagz6hrcqxoigbfd5e0y3q": 35, "03t14": 35, "8477": 35, "250": [35, 42], "sym": [35, 41, 56, 57], "beam_search": [35, 41, 56, 57], "decoding_method": 35, "beam_4": 35, "ensur": 35, "poor": 35, "531": [35, 36], "994": [35, 38], "027": 35, "encoder_out_dim": 35, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 35, "50d2281": 35, "mar": 35, "0815224919": 35, "75d558775b": 35, "mmnv8": 35, "72": [35, 38], "878": [35, 47], "880": 35, "891": 35, "113": [35, 38], "userwarn": 35, "__floordiv__": 35, "round": 35, "toward": 35, "trunc": 35, "floor": 35, "keep": [35, 41, 56, 57], "div": 35, "b": [35, 38, 46, 47], "rounding_mod": 35, "divis": 35, "x_len": 35, "163": [35, 38], "\u6ede": 35, "322": 35, "759": 35, "760": 35, "919": 35, "922": 35, "929": 35, "046": 35, "319": [35, 38], "798": 35, "831": [35, 47], "215": [35, 38, 42], "402": 35, "topk_hyp_index": 35, "topk_index": 35, "logit": 35, "583": [35, 47], "lji9mwuorlow3jkdhxwk8a": 36, "13t11": 36, "4454": 36, "icefall_asr_aishell_tdnn_lstm_ctc": 36, "858": [36, 38], "389": [36, 38], "161": [36, 38], "536": 36, "539": 36, "917": 36, "\u505c\u6ede": 36, "mmi": [37, 40], "blank": [37, 40], "skip": [37, 39, 40, 41, 55, 56, 57], "distil": [37, 40], "hubert": [37, 40], "ligru": [37, 45], "libri": [38, 39, 41, 43, 44, 55, 56, 57], "3x960": [38, 41, 43, 44, 55, 56, 57], "2880": [38, 41, 43, 44, 55, 56, 57], "lzgnetjwrxc3yghnmd4kpw": 38, "24t16": 38, "4540": 38, "sentenc": [38, 51], "piec": 38, "And": [38, 41, 43, 44, 55, 56, 57], "neither": 38, "nor": 38, "5000": 38, "033": 38, "537": 38, "full_libri": [38, 39], "464": 38, "548": 38, "776": 38, "652": [38, 49], "109226120": 38, "714": [38, 46], "206": 38, "944": 38, "1328": 38, "443": [38, 42], "2563": 38, "494": 38, "592": 38, "1715": 38, "52576": 38, "128": 38, "1424": 38, "807": 38, "506": 38, "808": [38, 46], "362": 38, "1477": 38, "2922": 38, "4295": 38, "52343": 38, "396": 38, "3584": 38, "432": 38, "680": [38, 46], "_pickl": 38, "unpicklingerror": 38, "invalid": 38, "hlg_modifi": 38, "g_4_gram": [38, 42, 46, 47], "sentencepiec": 38, "875": [38, 42], "212k": 38, "267440": [38, 42], "1253": [38, 42], "535k": 38, "77200": [38, 42], "154k": 38, "554": 38, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 38, "8d93169": 38, "601": 38, "025": 38, "broffel": 38, "osom": 38, "723": 38, "775": 38, "881": 38, "571": 38, "857": 38, "979": 38, "055": 38, "117": 38, "051": 38, "363": 38, "959": [38, 47], "546": 38, "598": 38, "599": [38, 42], "833": 38, "834": 38, "915": 38, "076": 38, "110": 38, "397": 38, "999": [38, 41, 56, 57], "concaten": 38, "bucket": 38, "sampler": 38, "ctc_decod": 38, "ngram_lm_rescor": 38, "attention_rescor": 38, "105": 38, "125": [38, 49], "228": 38, "543": 38, "topo": 38, "547": 38, "729": 38, "703": 38, "545": 38, "122": 38, "126": 38, "135": [38, 49], "153": [38, 49], "945": 38, "475": 38, "191": [38, 46, 47], "398": 38, "515": 38, "deseri": 38, "441": 38, "fsaclass": 38, "loadfsa": 38, "const": 38, "string": 38, "c10": 38, "ignor": 38, "589": 38, "attention_scal": 38, "162": 38, "169": [38, 46, 47], "188": 38, "984": 38, "624": 38, "519": [38, 47], "632": 38, "645": [38, 49], "243": 38, "303": 38, "179": 38, "knowledg": 39, "vector": 39, "mvq": 39, "kd": 39, "pruned_transducer_stateless4": [39, 41, 53, 56], "theoret": 39, "applic": 39, "minor": 39, "stop_stag": [39, 59, 60], "thing": 39, "distillation_with_hubert": 39, "Of": 39, "cours": 39, "xl": 39, "proce": 39, "960h": [39, 43], "use_extracted_codebook": 39, "augment": 39, "th": [39, 46, 47], "embedding_lay": 39, "num_codebook": 39, "under": [39, 51], "vq_fbank_layer36_cb8": 39, "whola": 39, "snippet": 39, "echo": 39, "awk": 39, "split": 39, "pruned_transducer_stateless6": 39, "12359": 39, "spec": 39, "warp": 39, "paid": 39, "suitabl": [41, 55, 56, 57], "pruned_transducer_stateless2": [41, 53, 56], "pruned_transducer_stateless5": [41, 53, 56], "scroll": [41, 43, 44, 55, 56, 57], "arxiv": [41, 55, 56, 57], "2206": [41, 55, 56, 57], "13236": [41, 55, 56, 57], "rework": [41, 53, 56], "daniel": [41, 56, 57], "joint": [41, 55, 56, 57], "contrari": [41, 55, 56, 57], "convent": [41, 55, 56, 57], "recurr": [41, 55, 56, 57], "2x": [41, 56, 57], "littl": [41, 56], "436000": [41, 43, 44, 55, 56, 57], "438000": [41, 43, 44, 55, 56, 57], "qogspbgsr8kzcrmmie9jgw": 41, "20t15": [41, 55, 56], "4468": [41, 55, 56], "210171": [41, 55, 56], "access": [41, 43, 44, 55, 56, 57], "googl": [41, 43, 44, 55, 56, 57], "6008": [41, 43, 44, 55, 56, 57], "localhost": [41, 43, 44, 55, 56, 57], "expos": [41, 43, 44, 55, 56, 57], "proxi": [41, 43, 44, 55, 56, 57], "bind_al": [41, 43, 44, 55, 56, 57], "fast_beam_search": [41, 43, 55, 56, 57], "474000": [41, 55, 56, 57], "largest": [41, 56, 57], "posterior": [41, 43, 56, 57], "algorithm": [41, 56, 57], "pdf": [41, 44, 56, 57], "1211": [41, 56, 57], "3711": [41, 56, 57], "espnet": [41, 56, 57], "net": [41, 56, 57], "beam_search_transduc": [41, 56, 57], "basic": [41, 56], "topk": [41, 56, 57], "expand": [41, 56, 57], "mode": [41, 56, 57], "being": [41, 56, 57], "hardcod": [41, 56, 57], "composit": [41, 56, 57], "log_prob": [41, 56, 57], "hard": [41, 53, 56, 57], "2211": [41, 56, 57], "00484": [41, 56, 57], "fast_beam_search_lg": [41, 56, 57], "trivial": [41, 56, 57], "fast_beam_search_nbest": [41, 56, 57], "random_path": [41, 56, 57], "shortest": [41, 56, 57], "fast_beam_search_nbest_lg": [41, 56, 57], "logic": [41, 56, 57], "smallest": [41, 55, 56, 57], "normal": [42, 46, 47, 49, 56], "icefall_asr_librispeech_tdnn": 42, "lstm_ctc": 42, "flac": 42, "116k": 42, "140k": 42, "343k": 42, "164k": 42, "105k": 42, "174k": 42, "pretraind": 42, "584": [42, 47], "791": 42, "245": 42, "098": 42, "099": 42, "methond": [42, 46, 47], "631": 42, "010": 42, "guidanc": 43, "bigger": 43, "simpli": 43, "discard": 43, "prevent": 43, "lconv": 43, "encourag": [43, 44, 55], "stabil": [43, 44], "doesn": 43, "warm": [43, 44], "xyozukpeqm62hbilud4upa": [43, 44], "ctc_guide_decode_b": 43, "pretrained_ctc": 43, "jit_pretrained_ctc": 43, "100h": 43, "yfyeung": 43, "wechat": 44, "zipformer_mmi": 44, "worker": [44, 55], "hp": 44, "tdnn_ligru_ctc": 46, "enough": [46, 47, 49, 51], "luomingshuang": [46, 47], "icefall_asr_timit_tdnn_ligru_ctc": 46, "pretrained_average_9_25": 46, "fdhc0_si1559": [46, 47], "felc0_si756": [46, 47], "fmgd0_si1564": [46, 47], "ffprobe": [46, 47], "show_format": [46, 47], "nistspher": [46, 47], "database_id": [46, 47], "database_vers": [46, 47], "utterance_id": [46, 47], "dhc0_si1559": [46, 47], "sample_min": [46, 47], "4176": [46, 47], "sample_max": [46, 47], "5984": [46, 47], "bitrat": [46, 47], "pcm_s16le": [46, 47], "s16": [46, 47], "elc0_si756": [46, 47], "1546": [46, 47], "1989": [46, 47], "mgd0_si1564": [46, 47], "7626": [46, 47], "10573": [46, 47], "660": 46, "695": 46, "697": 46, "819": 46, "829": 46, "sil": [46, 47], "dh": [46, 47], "ih": [46, 47], "uw": [46, 47], "ah": [46, 47], "ii": [46, 47], "z": [46, 47], "aa": [46, 47], "ei": [46, 47], "dx": [46, 47], "d": [46, 47, 51], "uh": [46, 47], "ng": [46, 47], "eh": [46, 47], "jh": [46, 47], "er": [46, 47], "ai": [46, 47], "hh": [46, 47], "aw": 46, "ae": [46, 47], "705": 46, "715": 46, "720": 46, "251": [46, 47], "ch": 46, "icefall_asr_timit_tdnn_lstm_ctc": 47, "pretrained_average_16_25": 47, "816": 47, "827": 47, "unk": 47, "739": 47, "977": 47, "978": 47, "981": 47, "ow": 47, "ykubhb5wrmosxykid1z9eg": 49, "23t23": 49, "icefall_asr_yesno_tdnn": 49, "0_0_1_0_0_1_1_1": 49, "0_0_1_0_1_0_0_1": 49, "0_0_1_1_0_0_0_1": 49, "0_0_1_1_0_1_1_0": 49, "0_0_1_1_1_0_0_0": 49, "0_0_1_1_1_1_0_0": 49, "0_1_0_0_0_1_0_0": 49, "0_1_0_0_1_0_1_0": 49, "0_1_0_1_0_0_0_0": 49, "0_1_0_1_1_1_0_0": 49, "0_1_1_0_0_1_1_1": 49, "0_1_1_1_0_0_1_0": 49, "0_1_1_1_1_0_1_0": 49, "1_0_0_0_0_0_0_0": 49, "1_0_0_0_0_0_1_1": 49, "1_0_0_1_0_1_1_1": 49, "1_0_1_1_0_1_1_1": 49, "1_0_1_1_1_1_0_1": 49, "1_1_0_0_0_1_1_1": 49, "1_1_0_0_1_0_1_1": 49, "1_1_0_1_0_1_0_0": 49, "1_1_0_1_1_0_0_1": 49, "1_1_0_1_1_1_1_0": 49, "1_1_1_0_0_1_0_1": 49, "1_1_1_0_1_0_1_0": 49, "1_1_1_1_0_0_1_0": 49, "1_1_1_1_1_0_0_0": 49, "1_1_1_1_1_1_1_1": 49, "54080": 49, "507": 49, "108k": 49, "119": 49, "650": 49, "139": 49, "143": 49, "198": 49, "181": 49, "186": 49, "187": 49, "correctli": 49, "simplest": 49, "nnlm": 51, "complet": 51, "wget": 51, "resourc": 51, "norm": 51, "gzip": 51, "prepare_lm_training_data": 51, "lm_data": 51, "grab": 51, "cup": 51, "coffe": 51, "sort_lm_training_data": 51, "sorted_lm_data": 51, "statist": 51, "lm_data_stat": 51, "aforement": 51, "repeat": 51, "rnn_lm": 51, "tie": 51, "hyper": [51, 59, 60], "coupl": [51, 59, 60], "dai": [51, 59, 60], "former": 53, "mask": [53, 56, 57], "wenet": 53, "did": 53, "request": 53, "complic": 53, "techniqu": 53, "bank": 53, "memor": 53, "histori": 53, "introduc": 53, "variant": 53, "pruned_stateless_emformer_rnnt2": 53, "conv_emformer_transducer_stateless": 53, "ourself": 53, "mechan": 53, "onlin": 55, "lstm_transducer_stateless": 55, "prepare_giga_speech": 55, "cj2vtpiwqhkn9q1tx6ptpg": 55, "dynam": [56, 57], "causal": 56, "short": [56, 57], "2012": 56, "05481": 56, "flag": 56, "indic": [56, 57], "whether": 56, "sequenc": [56, 57], "uniformli": [56, 57], "seen": [56, 57], "97vkxf80ru61cnp2alwzzg": 56, "streaming_decod": [56, 57], "wise": [56, 57], "parallel": [56, 57], "bath": [56, 57], "parallelli": [56, 57], "seem": 56, "benefit": 56, "320m": 57, "550": 57, "basicli": 57, "scriptmodul": 57, "jit_trace_export": 57, "jit_trace_pretrain": 57, "monoton": 58, "align": 58, "condit": [59, 60], "variat": [59, 60], "autoencod": [59, 60], "adversari": [59, 60], "monotonic_align": [59, 60], "build_ext": [59, 60], "inplac": [59, 60], "medium": 59, "ground": [59, 60], "truth": [59, 60], "test_onnx": [59, 60], "2024": 59, "350": 60, "zrjin": 60, "synthesi": 61, "task": 61}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 22, 28, 29], "creat": [2, 13, 21], "recip": [2, 61], "data": [2, 9, 11, 21, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "prepar": [2, 9, 11, 21, 31, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "train": [2, 9, 16, 18, 21, 24, 25, 26, 27, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "decod": [2, 5, 6, 7, 9, 12, 21, 22, 27, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "pre": [2, 18, 24, 25, 26, 27, 31, 32, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57], "model": [2, 5, 15, 18, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 51, 55, 56, 57, 59, 60], "lodr": [4, 6], "rnn": [4, 50, 51], "transduc": [4, 6, 7, 24, 25, 26, 35, 41, 55, 56, 57], "wer": [4, 6, 7, 38], "differ": [4, 6, 7], "beam": [4, 6, 7, 35], "size": [4, 6, 7], "languag": [5, 51], "lm": [6, 38, 50], "rescor": [6, 33, 38], "base": 6, "method": 6, "v": 6, "shallow": [6, 7], "fusion": [6, 7], "The": [6, 35], "number": 6, "each": 6, "field": 6, "i": 6, "test": [6, 7, 21, 24, 25, 26], "clean": [6, 7], "other": 6, "time": [6, 7], "docker": [8, 9], "introduct": [9, 53], "view": 9, "avail": 9, "tag": 9, "cuda": [9, 21], "enabl": 9, "imag": 9, "cpu": 9, "onli": 9, "download": [9, 11, 21, 24, 25, 26, 27, 33, 35, 36, 38, 41, 42, 43, 44, 46, 47, 49, 55, 56, 57, 59, 60], "run": [9, 22], "gpu": 9, "yesno": [9, 48], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 11, 27], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 21, 24, 25, 26, 33, 35, 36, 38], "0": [10, 21], "No": 10, "For": [11, 12, 13, 15, 16], "more": [11, 12, 13, 15, 16], "curiou": [11, 12, 13, 15, 16], "A": 11, "quick": 11, "look": 11, "gener": 11, "environ": [13, 21], "setup": 13, "virtual": [13, 21], "instal": [13, 21, 24, 25, 26, 33, 35, 36, 38, 42, 46, 47], "depend": 13, "icefal": [13, 14, 20, 21, 24, 25, 26], "dummi": 14, "tutori": 14, "export": [15, 22, 23, 24, 25, 26, 27, 28, 29, 30, 41, 43, 44, 55, 56, 57, 59, 60], "paramet": 15, "via": [15, 24, 25, 26], "state_dict": [15, 22, 41, 43, 44, 55, 56, 57], "torch": [15, 21, 24, 25, 26, 28, 29, 41, 43, 44, 55, 56, 57], "jit": [15, 24, 25, 26, 28, 29, 41, 43, 44, 55, 56, 57], "script": [15, 28, 41, 43, 44, 56, 57], "onnx": [15, 27], "huggingfac": [17, 19], "space": 19, "youtub": [19, 21], "video": [19, 21], "content": [20, 32, 61], "toolkit": 21, "cudnn": 21, "torchaudio": 21, "2": [21, 24, 25, 26, 33, 35, 36, 38], "k2": 21, "3": [21, 24, 25, 26, 33, 35, 38], "lhots": 21, "4": [21, 24, 25, 26], "exampl": [21, 27, 33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "5": [21, 24, 25, 26], "6": [21, 24, 25, 26], "your": 21, "when": [22, 28, 29], "us": [22, 28, 29, 41, 43, 44, 55, 56, 57], "py": 22, "ncnn": [23, 24, 25, 26], "convemform": 24, "pnnx": [24, 25, 26], "trace": [24, 25, 26, 29, 55, 57], "torchscript": [24, 25, 26], "modifi": [24, 25, 26, 35], "encod": [24, 25, 26], "sherpa": [24, 25, 26, 27, 41, 56, 57], "7": [24, 25], "option": [24, 25, 33, 36, 38, 41, 43, 44, 55, 56, 57], "int8": [24, 25], "quantiz": [24, 25], "lstm": [25, 36, 42, 47, 55], "stream": [26, 37, 52, 53, 56, 57], "zipform": [26, 31, 43, 44, 57], "sound": 27, "finetun": 31, "from": 31, "supervis": 31, "fine": [31, 32], "tune": [31, 32], "tabl": [32, 61], "conform": [33, 38, 53], "ctc": [33, 36, 38, 42, 43, 46, 47, 49], "configur": [33, 36, 38, 41, 43, 44, 55, 56, 57], "log": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "usag": [33, 35, 36, 38, 41, 43, 44, 55, 56, 57], "case": [33, 35, 36, 38], "kaldifeat": [33, 35, 36, 38, 42, 46, 47, 49], "hlg": [33, 36, 38], "attent": [33, 38], "colab": [33, 35, 36, 38, 42, 46, 47, 49], "notebook": [33, 35, 36, 38, 42, 46, 47, 49], "deploy": [33, 38], "c": [33, 38], "aishel": 34, "stateless": 35, "loss": 35, "todo": 35, "greedi": 35, "search": [35, 59, 60], "tdnn": [36, 42, 46, 47, 49], "non": 37, "asr": [37, 52], "comput": 38, "n": 38, "gram": 38, "distil": 39, "hubert": 39, "codebook": 39, "index": 39, "librispeech": [40, 54], "prune": [41, 56], "statelessx": [41, 56], "pretrain": [41, 43, 44, 55, 56, 57, 59, 60], "deploi": [41, 56, 57], "infer": [42, 46, 47, 49, 59, 60], "blank": 43, "skip": 43, "mmi": 44, "timit": 45, "ligru": 46, "an": 51, "emform": 53, "which": 55, "simul": [56, 57], "real": [56, 57], "tt": 58, "vit": [59, 60], "ljspeech": 59, "build": [59, 60], "monoton": [59, 60], "align": [59, 60], "vctk": 60}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 58}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [11, "data-preparation"], [35, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [21, "training"], [33, "training"], [35, "training"], [36, "training"], [38, "training"], [39, "training"], [41, "training"], [42, "training"], [43, "training"], [44, "training"], [46, "training"], [47, "training"], [49, "training"], [55, "training"], [56, "training"], [57, "training"], [59, "training"], [60, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [12, "decoding"], [21, "decoding"], [33, "decoding"], [35, "decoding"], [36, "decoding"], [38, "decoding"], [39, "decoding"], [41, "decoding"], [42, "decoding"], [43, "decoding"], [44, "decoding"], [46, "decoding"], [47, "decoding"], [49, "decoding"], [55, "decoding"], [56, "decoding"], [57, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[6, "id3"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [53, "introduction"]], "View available tags": [[9, "view-available-tags"]], "CUDA-enabled docker images": [[9, "cuda-enabled-docker-images"]], "CPU-only docker images": [[9, "cpu-only-docker-images"]], "Download a docker image (CUDA)": [[9, "download-a-docker-image-cuda"]], "Download a docker image (CPU)": [[9, "download-a-docker-image-cpu"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [21, "data-preparation"], [31, "data-preparation"], [33, "data-preparation"], [36, "data-preparation"], [38, "data-preparation"], [39, "data-preparation"], [41, "data-preparation"], [42, "data-preparation"], [43, "data-preparation"], [44, "data-preparation"], [46, "data-preparation"], [47, "data-preparation"], [49, "data-preparation"], [55, "data-preparation"], [56, "data-preparation"], [57, "data-preparation"], [59, "data-preparation"], [60, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "For the more curious": [[11, "for-the-more-curious"], [12, "for-the-more-curious"], [13, "for-the-more-curious"], [15, "for-the-more-curious"], [16, "for-the-more-curious"]], "A quick look to the generated files": [[11, "a-quick-look-to-the-generated-files"]], "download": [[11, "download"]], "data": [[11, "data"]], "Environment setup": [[13, "environment-setup"]], "Create a virtual environment": [[13, "create-a-virtual-environment"]], "Install dependencies": [[13, "install-dependencies"]], "Install icefall": [[13, "install-icefall"]], "Icefall for dummies tutorial": [[14, "icefall-for-dummies-tutorial"]], "Model Export": [[15, "model-export"]], "Export the model parameters via model.state_dict()": [[15, "export-the-model-parameters-via-model-state-dict"]], "Export via torch.jit.script()": [[15, "export-via-torch-jit-script"]], "Export via torch.onnx.export()": [[15, "export-via-torch-onnx-export"]], "Huggingface": [[17, "huggingface"]], "Pre-trained models": [[18, "pre-trained-models"]], "Huggingface spaces": [[19, "huggingface-spaces"]], "YouTube Video": [[19, "youtube-video"], [21, "youtube-video"]], "Icefall": [[20, "icefall"]], "Contents:": [[20, null]], "Installation": [[21, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[21, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[21, "install-torch-and-torchaudio"]], "(2) Install k2": [[21, "install-k2"]], "(3) Install lhotse": [[21, "install-lhotse"]], "(4) Download icefall": [[21, "download-icefall"]], "Installation example": [[21, "installation-example"]], "(1) Create a virtual environment": [[21, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[21, "id1"]], "(3) Install torch and torchaudio": [[21, "id2"]], "(4) Install k2": [[21, "id3"]], "(5) Install lhotse": [[21, "id5"]], "(6) Download icefall": [[21, "id6"]], "Test Your Installation": [[21, "test-your-installation"]], "Export model.state_dict()": [[22, "export-model-state-dict"], [41, "export-model-state-dict"], [43, "export-model-state-dict"], [44, "export-model-state-dict"], [55, "export-model-state-dict"], [56, "export-model-state-dict"], [57, "export-model-state-dict"]], "When to use it": [[22, "when-to-use-it"], [28, "when-to-use-it"], [29, "when-to-use-it"]], "How to export": [[22, "how-to-export"], [28, "how-to-export"], [29, "how-to-export"]], "How to use the exported model": [[22, "how-to-use-the-exported-model"], [28, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[22, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[23, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[24, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[24, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [26, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[24, "install-ncnn-and-pnnx"], [25, "install-ncnn-and-pnnx"], [26, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[24, "export-the-model-via-torch-jit-trace"], [25, "export-the-model-via-torch-jit-trace"], [26, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[24, "export-torchscript-model-via-pnnx"], [25, "export-torchscript-model-via-pnnx"], [26, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[24, "test-the-exported-models-in-icefall"], [25, "test-the-exported-models-in-icefall"], [26, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[24, "modify-the-exported-encoder-for-sherpa-ncnn"], [25, "modify-the-exported-encoder-for-sherpa-ncnn"], [26, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[24, "optional-int8-quantization-with-sherpa-ncnn"], [25, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[25, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[26, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[27, "export-to-onnx"]], "sherpa-onnx": [[27, "sherpa-onnx"]], "Example": [[27, "example"]], "Download the pre-trained model": [[27, "download-the-pre-trained-model"], [33, "download-the-pre-trained-model"], [35, "download-the-pre-trained-model"], [36, "download-the-pre-trained-model"], [38, "download-the-pre-trained-model"], [42, "download-the-pre-trained-model"], [46, "download-the-pre-trained-model"], [47, "download-the-pre-trained-model"], [49, "download-the-pre-trained-model"]], "Export the model to ONNX": [[27, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[27, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[28, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[29, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[29, "how-to-use-the-exported-models"]], "Model export": [[30, "model-export"]], "Finetune from a supervised pre-trained Zipformer model": [[31, "finetune-from-a-supervised-pre-trained-zipformer-model"]], "Model preparation": [[31, "model-preparation"]], "Fine-tune": [[31, "fine-tune"]], "Fine-tune a pre-trained model": [[32, "fine-tune-a-pre-trained-model"]], "Table of Contents": [[32, null], [61, null]], "Conformer CTC": [[33, "conformer-ctc"], [38, "conformer-ctc"]], "Configurable options": [[33, "configurable-options"], [36, "configurable-options"], [38, "configurable-options"], [41, "configurable-options"], [43, "configurable-options"], [44, "configurable-options"], [55, "configurable-options"], [56, "configurable-options"], [57, "configurable-options"]], "Pre-configured options": [[33, "pre-configured-options"], [36, "pre-configured-options"], [38, "pre-configured-options"], [41, "pre-configured-options"], [43, "pre-configured-options"], [44, "pre-configured-options"], [55, "pre-configured-options"], [56, "pre-configured-options"], [57, "pre-configured-options"]], "Training logs": [[33, "training-logs"], [35, "training-logs"], [36, "training-logs"], [38, "training-logs"], [41, "training-logs"], [43, "training-logs"], [44, "training-logs"], [55, "training-logs"], [56, "training-logs"], [57, "training-logs"]], "Usage examples": [[33, "usage-examples"], [35, "usage-examples"], [36, "usage-examples"], [38, "usage-examples"]], "Case 1": [[33, "case-1"], [35, "case-1"], [36, "case-1"], [38, "case-1"]], "Case 2": [[33, "case-2"], [35, "case-2"], [36, "case-2"], [38, "case-2"]], "Case 3": [[33, "case-3"], [35, "case-3"], [38, "case-3"]], "Pre-trained Model": [[33, "pre-trained-model"], [35, "pre-trained-model"], [36, "pre-trained-model"], [38, "pre-trained-model"], [42, "pre-trained-model"], [46, "pre-trained-model"], [47, "pre-trained-model"], [49, "pre-trained-model"]], "Install kaldifeat": [[33, "install-kaldifeat"], [35, "install-kaldifeat"], [36, "install-kaldifeat"], [38, "install-kaldifeat"], [42, "install-kaldifeat"], [46, "install-kaldifeat"], [47, "install-kaldifeat"]], "Usage": [[33, "usage"], [35, "usage"], [36, "usage"], [38, "usage"]], "CTC decoding": [[33, "ctc-decoding"], [38, "ctc-decoding"], [38, "id2"]], "HLG decoding": [[33, "hlg-decoding"], [33, "id2"], [36, "hlg-decoding"], [38, "hlg-decoding"], [38, "id3"]], "HLG decoding + attention decoder rescoring": [[33, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[33, "colab-notebook"], [35, "colab-notebook"], [36, "colab-notebook"], [38, "colab-notebook"], [42, "colab-notebook"], [46, "colab-notebook"], [47, "colab-notebook"], [49, "colab-notebook"]], "Deployment with C++": [[33, "deployment-with-c"], [38, "deployment-with-c"]], "aishell": [[34, "aishell"]], "Stateless Transducer": [[35, "stateless-transducer"]], "The Model": [[35, "the-model"]], "The Loss": [[35, "the-loss"]], "Todo": [[35, "id1"]], "Greedy search": [[35, "greedy-search"]], "Beam search": [[35, "beam-search"]], "Modified Beam search": [[35, "modified-beam-search"]], "TDNN-LSTM CTC": [[36, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[37, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[38, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[38, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[38, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[38, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[38, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[39, "distillation-with-hubert"]], "Codebook index preparation": [[39, "codebook-index-preparation"]], "LibriSpeech": [[40, "librispeech"], [54, "librispeech"]], "Pruned transducer statelessX": [[41, "pruned-transducer-statelessx"], [56, "pruned-transducer-statelessx"]], "Usage example": [[41, "usage-example"], [43, "usage-example"], [44, "usage-example"], [55, "usage-example"], [56, "usage-example"], [57, "usage-example"]], "Export Model": [[41, "export-model"], [56, "export-model"], [57, "export-model"]], "Export model using torch.jit.script()": [[41, "export-model-using-torch-jit-script"], [43, "export-model-using-torch-jit-script"], [44, "export-model-using-torch-jit-script"], [56, "export-model-using-torch-jit-script"], [57, "export-model-using-torch-jit-script"]], "Download pretrained models": [[41, "download-pretrained-models"], [43, "download-pretrained-models"], [44, "download-pretrained-models"], [55, "download-pretrained-models"], [56, "download-pretrained-models"], [57, "download-pretrained-models"], [59, "download-pretrained-models"], [60, "download-pretrained-models"]], "Deploy with Sherpa": [[41, "deploy-with-sherpa"], [56, "deploy-with-sherpa"], [57, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[42, "tdnn-lstm-ctc"], [47, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[42, "inference-with-a-pre-trained-model"], [46, "inference-with-a-pre-trained-model"], [47, "inference-with-a-pre-trained-model"], [49, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[43, "zipformer-ctc-blank-skip"]], "Export models": [[43, "export-models"], [44, "export-models"], [55, "export-models"], [59, "export-models"], [60, "export-models"]], "Zipformer MMI": [[44, "zipformer-mmi"]], "TIMIT": [[45, "timit"]], "TDNN-LiGRU-CTC": [[46, "tdnn-ligru-ctc"]], "YesNo": [[48, "yesno"]], "TDNN-CTC": [[49, "tdnn-ctc"]], "Download kaldifeat": [[49, "download-kaldifeat"]], "RNN-LM": [[50, "rnn-lm"]], "Train an RNN language model": [[51, "train-an-rnn-language-model"]], "Streaming ASR": [[52, "streaming-asr"]], "Streaming Conformer": [[53, "streaming-conformer"]], "Streaming Emformer": [[53, "streaming-emformer"]], "LSTM Transducer": [[55, "lstm-transducer"]], "Which model to use": [[55, "which-model-to-use"]], "Export model using torch.jit.trace()": [[55, "export-model-using-torch-jit-trace"], [57, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[56, "simulate-streaming-decoding"], [57, "simulate-streaming-decoding"]], "Real streaming decoding": [[56, "real-streaming-decoding"], [57, "real-streaming-decoding"]], "Zipformer Transducer": [[57, "zipformer-transducer"]], "TTS": [[58, "tts"]], "VITS-LJSpeech": [[59, "vits-ljspeech"]], "Build Monotonic Alignment Search": [[59, "build-monotonic-alignment-search"], [60, "build-monotonic-alignment-search"]], "Inference": [[59, "inference"], [60, "inference"]], "VITS-VCTK": [[60, "vits-vctk"]], "Recipes": [[61, "recipes"]]}, "indexentries": {}})
\ No newline at end of file