Skip to content

Commit

Permalink
Use a larger padding value for multilingual whisper models
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Dec 7, 2023
1 parent 81f502a commit 191407a
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion scripts/whisper/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def compute_features(filename: str) -> torch.Tensor:
# You can use another value instead of 50.
mel = torch.nn.functional.pad(mel, (0, 0, 0, 50), "constant", 0)
# Note that if it throws for a multilingual model,
# please use a larger value, say 200
# please use a larger value, say 300

target = 3000
if mel.shape[0] > target:
Expand Down
4 changes: 2 additions & 2 deletions sherpa-onnx/csrc/offline-recognizer-whisper-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
// tail_padding_frames so that whisper is able to detect the eot token.
int32_t tail_padding_frames = 50;
if (model_->IsMultiLingual()) {
// 200 is an experience value. If it throws, please use a larger value.
tail_padding_frames = 200;
// 300 is an experience value. If it throws, please use a larger value.
tail_padding_frames = 300;
}

int32_t actual_frames =
Expand Down

0 comments on commit 191407a

Please sign in to comment.