-
Notifications
You must be signed in to change notification settings - Fork 26
/
make_plp.sh
executable file
·143 lines (115 loc) · 4.09 KB
/
make_plp.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# To be run from .. (one directory up from here)
# see ../run.sh for example
# Begin configuration section.
nj=4
cmd=run.pl
plp_config=conf/plp.conf
compress=true
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# -lt 1 ] || [ $# -gt 3 ]; then
echo "Usage: $0 [options] <data-dir> [<log-dir> [<plp-dir>] ]";
echo "e.g.: $0 data/train exp/make_plp/train mfcc"
echo "Note: <log-dir> defaults to <data-dir>/log, and <plp-dir> defaults to <data-dir>/data"
echo "Options: "
echo " --plp-config <config-file> # config passed to compute-plp-feats "
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
if [ $# -ge 2 ]; then
logdir=$2
else
logdir=$data/log
fi
if [ $# -ge 3 ]; then
plpdir=$3
else
plpdir=$data/data
fi
# make $plpdir an absolute pathname.
plpdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $plpdir ${PWD}`
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $plpdir || exit 1;
mkdir -p $logdir || exit 1;
if [ -f $data/feats.scp ]; then
mkdir -p $data/.backup
echo "$0: moving $data/feats.scp to $data/.backup"
mv $data/feats.scp $data/.backup
fi
scp=$data/wav.scp
required="$scp $plp_config"
for f in $required; do
if [ ! -f $f ]; then
echo "make_plp.sh: no such file $f"
exit 1;
fi
done
utils/validate_data_dir.sh --no-text --no-feats $data || exit 1;
if [ -f $data/spk2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/spk2warp"
vtln_opts="--vtln-map=ark:$data/spk2warp --utt2spk=ark:$data/utt2spk"
elif [ -f $data/utt2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/utt2warp"
vtln_opts="--vtln-map=ark:$data/utt2warp"
fi
for n in $(seq $nj); do
# the next command does nothing unless $plpdir/storage/ exists, see
# utils/create_data_link.pl for more info.
utils/create_data_link.pl $plpdir/raw_plp_$name.$n.ark
done
if [ -f $data/segments ]; then
echo "$0 [info]: segments file exists: using that."
split_segments=""
for n in $(seq $nj); do
split_segments="$split_segments $logdir/segments.$n"
done
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
$cmd JOB=1:$nj $logdir/make_plp_${name}.JOB.log \
extract-segments scp,p:$scp $logdir/segments.JOB ark:- \| \
compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config ark:- ark:- \| \
copy-feats --compress=$compress ark:- \
ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \
|| exit 1;
else
echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
split_scps=""
for n in $(seq $nj); do
split_scps="$split_scps $logdir/wav_${name}.$n.scp"
done
utils/split_scp.pl $scp $split_scps || exit 1;
$cmd JOB=1:$nj $logdir/make_plp_${name}.JOB.log \
compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config scp,p:$logdir/wav_${name}.JOB.scp ark:- \| \
copy-feats --compress=$compress ark:- \
ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \
|| exit 1;
fi
if [ -f $logdir/.error.$name ]; then
echo "Error producing plp features for $name:"
tail $logdir/make_plp_${name}.1.log
exit 1;
fi
# concatenate the .scp files together.
for n in $(seq $nj); do
cat $plpdir/raw_plp_$name.$n.scp || exit 1;
done > $data/feats.scp
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null
nf=`cat $data/feats.scp | wc -l`
nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
fi
if [ $nf -lt $[$nu - ($nu/20)] ]; then
echo "Less than 95% the features were successfully generated. Probably a serious error."
exit 1;
fi
echo "Succeeded creating PLP features for $name"