Skip to content

Commit

Permalink
use shuffled LibriSpeech cuts instead
Browse files Browse the repository at this point in the history
  • Loading branch information
JinZr committed Jan 5, 2024
1 parent f42258c commit 9c29aaa
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 24 deletions.
6 changes: 3 additions & 3 deletions egs/librispeech/ASR/conformer_ctc3/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,10 +952,10 @@ def run(rank, world_size, args):

librispeech = LibriSpeechAsrDataModule(args)

train_cuts = librispeech.train_clean_100_cuts()
if params.full_libri:
train_cuts += librispeech.train_clean_360_cuts()
train_cuts += librispeech.train_other_500_cuts()
train_cuts = librispeech.train_all_shuf_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()

def remove_short_and_long_utt(c: Cut):
# Keep only utterances with duration between 1 second and 20 seconds
Expand Down
7 changes: 4 additions & 3 deletions egs/librispeech/ASR/conformer_mmi/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,10 +771,11 @@ def run(rank, world_size, args):
valid_ali = None

librispeech = LibriSpeechAsrDataModule(args)
train_cuts = librispeech.train_clean_100_cuts()

if params.full_libri:
train_cuts += librispeech.train_clean_360_cuts()
train_cuts += librispeech.train_other_500_cuts()
train_cuts = librispeech.train_all_shuf_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()

def remove_short_and_long_utt(c: Cut):
# Keep only utterances with duration between 1 second and 20 seconds
Expand Down
6 changes: 3 additions & 3 deletions egs/librispeech/ASR/lstm_transducer_stateless3/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -989,10 +989,10 @@ def run(rank, world_size, args):

librispeech = LibriSpeechAsrDataModule(args)

train_cuts = librispeech.train_clean_100_cuts()
if params.full_libri:
train_cuts += librispeech.train_clean_360_cuts()
train_cuts += librispeech.train_other_500_cuts()
train_cuts = librispeech.train_all_shuf_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()

def remove_short_and_long_utt(c: Cut):
# Keep only utterances with duration between 1 second and 20 seconds
Expand Down
6 changes: 3 additions & 3 deletions egs/librispeech/ASR/pruned2_knowledge/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,10 +817,10 @@ def run(rank, world_size, args):

librispeech = LibriSpeechAsrDataModule(args)

train_cuts = librispeech.train_clean_100_cuts()
if params.full_libri:
train_cuts += librispeech.train_clean_360_cuts()
train_cuts += librispeech.train_other_500_cuts()
train_cuts = librispeech.train_all_shuf_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()

def remove_short_and_long_utt(c: Cut):
# Keep only utterances with duration between 1 second and 20 seconds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1038,13 +1038,17 @@ def run(rank, world_size, args):

librispeech = LibriSpeechAsrDataModule(args)

assert not (
params.mini_libri and params.full_libri
), f"Cannot set both mini-libri and full-libri flags to True, now mini-libri {params.mini_libri} and full-libri {params.full_libri}"

if params.mini_libri:
train_cuts = librispeech.train_clean_5_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()
if params.full_libri:
train_cuts += librispeech.train_clean_360_cuts()
train_cuts += librispeech.train_other_500_cuts()
train_cuts = librispeech.train_all_shuf_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()

def remove_short_and_long_utt(c: Cut):
# Keep only utterances with duration between 1 second and 20 seconds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1150,10 +1150,10 @@ def run(rank, world_size, args):

librispeech = LibriSpeech(manifest_dir=args.manifest_dir)

train_cuts = librispeech.train_clean_100_cuts()
if params.full_libri:
train_cuts += librispeech.train_clean_360_cuts()
train_cuts += librispeech.train_other_500_cuts()
train_cuts = librispeech.train_all_shuf_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()

train_cuts = filter_short_and_long_utterances(train_cuts, sp)

Expand Down
6 changes: 3 additions & 3 deletions egs/librispeech/ASR/zipformer/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,10 +1174,10 @@ def run(rank, world_size, args):

librispeech = LibriSpeechAsrDataModule(args)

train_cuts = librispeech.train_clean_100_cuts()
if params.full_libri:
train_cuts += librispeech.train_clean_360_cuts()
train_cuts += librispeech.train_other_500_cuts()
train_cuts = librispeech.train_all_shuf_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()

def remove_short_and_long_utt(c: Cut):
# Keep only utterances with duration between 1 second and 20 seconds
Expand Down
3 changes: 0 additions & 3 deletions egs/librispeech/ASR/zipformer_mmi/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,10 +990,7 @@ def run(rank, world_size, args):

librispeech = LibriSpeechAsrDataModule(args)

# train_cuts = librispeech.train_clean_100_cuts()
if params.full_libri:
# train_cuts += librispeech.train_clean_360_cuts()
# train_cuts += librispeech.train_other_500_cuts()
train_cuts = librispeech.train_all_shuf_cuts()
else:
train_cuts = librispeech.train_clean_100_cuts()
Expand Down

0 comments on commit 9c29aaa

Please sign in to comment.