-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtranscribe-hmmdnn-basic.sh
122 lines (108 loc) · 4.36 KB
/
transcribe-hmmdnn-basic.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/bin/bash
# This script does basic decoding with an HMM/DNN ASR system.
# This is designed to take in a Kaldi data directory produced with
# NeMo and the connected scripts.
# Then, after decoding, this script will produce a CTM file
# which has timestamps for each output word.
hparams="hyperparams/chain/CRDNN-AA-contd.yaml"
treedir="exp/chain/tree"
graphdir="exp/chain/graph/graph_bpe.5000.varikn"
lmwt=13
wip=0.5
frame_shift=0.03
print_silence=false
hyp_filtering_cmd="local/wer_hyp_filter"
ctm_cmd="slurm.pl --mem 2G --time 0:30:0"
nj=128 # Number of jobs
stage=0
. path.sh
. parse_options.sh
set -eu
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <datadir> <outputdir>"
exit 1
fi
data="$1"
outputdir="$2"
decode_dir="$outputdir"/decode_$(basename "$hparams" .yaml)_$(basename "$graphdir")
if [ $stage -le 0 ]; then
# Note: the incoming datadir may not have uttids which work for kaldi's
# sorting. This doesn't really matter for our decoding runs though.
# Thus we don't run "fix"
#utils/fix_data_dir.sh "$data"
utils/utt2spk_to_spk2utt.pl "$data"/utt2spk > "$data"/spk2utt
mkdir -p "$decode_dir"
fi
if [ $stage -le 1 ]; then
# The decoding time should be less than 4 hours (times number of jobs)
# However, the acoustic model decodes everything sequentially and runs on the GPU
# It will probably take around 2 hours, but I suggest reserving for 4.
local/chain/decode.sh --datadir "$data" \
--acwt 1.5 --post-decode-acwt 15.0 \
--hparams "$hparams" \
--graphdir "$graphdir" \
--tree "$treedir" \
--nj $nj \
--skip_scoring "true" \
--decode_cmd "slurm.pl --mem 12G --time 4:0:0" \
--am_cmd "srun --gres=gpu:v100:1 --time 4:0:0 --mem 32G --partition gpu --account project_2006368" \
--decodedir "$decode_dir"
fi
# This section gets the CTM
# However, it doesn't work with the current Kaldi Subword Lang directory
# (see aalto-speech/subword-kaldi note on GitHub)
#
#if [ $stage -le 2 ]; then
# # NOTE: this steps copies steps/get_ctm_fast.sh
# # but just fixes some of the Kaldi assumptions (like having final.mdl in dir above decode dir)
# model="$treedir"/final.mdl
# nj=$(cat "$decode_dir"/num_jobs)
#
# if [ -f "$graphdir"/phones/word_boundary.int ]; then
# $ctm_cmd JOB=1:$nj $outputdir/log/get_ctm.JOB.log \
# set -o pipefail '&&' \
# lattice-1best --lm-scale=$lmwt --word-ins-penalty=$wip "ark:gunzip -c $decode_dir/lat.JOB.gz|" ark:- \| \
# lattice-align-words "$graphdir"/phones/word_boundary.int $model ark:- ark:- \| \
# nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \
# utils/int2sym.pl -f 5 "$graphdir"/words.txt \
# '>' "$outputdir"/ctm.JOB || exit 1;
# elif [ -f "$graphdir"/phones/align_lexicon.int ]; then
# $ctm_cmd JOB=1:$nj "$outputdir"/log/get_ctm.JOB.log \
# set -o pipefail '&&' \
# lattice-1best --lm-scale=$lmwt --word-ins-penalty=$wip "ark:gunzip -c $decode_dir/lat.JOB.gz|" ark:- \| \
# lattice-align-words-lexicon "$graphdir"/phones/align_lexicon.int $model ark:- ark:- \| \
# lattice-1best ark:- ark:- \| \
# nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \
# utils/int2sym.pl -f 5 "$graphdir"/words.txt \
# '>' "$outputdir"/ctm.JOB || exit 1;
# else
# echo "$0: neither "$graphdir"/phones/word_boundary.int nor "$graphdir"/phones/align_lexicon.int exists: cannot align."
# exit 1;
# fi
#
# for n in `seq $nj`; do
# cat "$outputdir"/ctm.$n
# rm "$outputdir"/ctm.$n
# done > "$outputdir"/ctm
#fi
# This section instead just gets the transcript
#
if [ $stage -le 2 ]; then
# This section copies steps/score_kaldi.sh
symtab="$graphdir"/words.txt
model="$treedir"/final.mdl
nj=$(cat "$decode_dir"/num_jobs)
$ctm_cmd JOB=1:$nj "$outputdir"/log/best_path_${lmwt}_${wip}.JOB.log \
lattice-scale --inv-acoustic-scale=$lmwt "ark:gunzip -c $decode_dir/lat.JOB.gz|" ark:- \| \
lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \
utils/int2sym.pl -f 2- $symtab \| \
$hyp_filtering_cmd '>' "$outputdir"/${lmwt}_${wip}.JOB.txt || exit 1;
for n in `seq $nj`; do
cat "$outputdir"/${lmwt}_${wip}.$n.txt
done > "$outputdir"/${lmwt}_${wip}.txt
ln -s "$PWD"/"$outputdir"/${lmwt}_${wip}.txt "$outputdir"/transcripts.txt
for n in `seq $nj`; do
rm "$outputdir"/${lmwt}_${wip}.$n.txt
done
fi