-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_cnceleb.sh
executable file
·139 lines (120 loc) · 6.41 KB
/
run_cnceleb.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env bash
. ./cmd.sh
. ./path.sh
set -e
mfccdir=`pwd`/mfcc
vaddir=`pwd`/mfcc
# cnceleb trials
cnceleb_root=data/CN-Celeb
eval_trails_core=data/eval_test/trials/trials.lst
nnet_dir=exp/xvector_nnet_cnceleb
stage=0
if [ $stage -le 0 ]; then
# Prepare the CN-Celeb dataset. The script is used to prepare the development
# dataset and evaluation dataset.
local/make_cnceleb.sh $cnceleb_root data
fi
if [ $stage -le 1 ]; then
# Make MFCCs and compute the energy-based VAD for each dataset
for name in train eval_enroll eval_test; do
steps/make_mfcc.sh --write-utt2num-frames true --mfcc-config conf/mfcc.conf --nj 8 --cmd "$train_cmd" \
data/${name} exp/make_mfcc $mfccdir
utils/fix_data_dir.sh data/${name}
sid/compute_vad_decision.sh --nj 8 --cmd "$train_cmd" \
data/${name} exp/make_vad $vaddir
utils/fix_data_dir.sh data/${name}
done
fi
# Now we prepare the features to generate examples for xvector training.
if [ $stage -le 2 ]; then
# This script applies CMVN and removes nonspeech frames. Note that this is somewhat
# wasteful, as it roughly doubles the amount of training data on disk. After
# creating training examples, this can be removed.
local/nnet3/xvector/prepare_feats_for_egs.sh --nj 8 --cmd "$train_cmd" \
data/train data/train_no_sil exp/train_no_sil
utils/fix_data_dir.sh data/train_no_sil
# # Now, we need to remove features that are too short after removing silence
# # frames. We want atleast 5s (500 frames) per utterance.
min_len=500
mv data/train_no_sil/utt2num_frames data/train_no_sil/utt2num_frames.bak
awk -v min_len=${min_len} '$2 > min_len {print $1, $2}' data/train_no_sil/utt2num_frames.bak > data/train_no_sil/utt2num_frames
utils/filter_scp.pl data/train_no_sil/utt2num_frames data/train_no_sil/utt2spk > data/train_no_sil/utt2spk.new
mv data/train_no_sil/utt2spk.new data/train_no_sil/utt2spk
utils/fix_data_dir.sh data/train_no_sil
# We also want several utterances per speaker. Now we'll throw out speakers
# with fewer than 8 utterances.
min_num_utts=5
awk '{print $1, NF-1}' data/train_no_sil/spk2utt > data/train_no_sil/spk2num
awk -v min_num_utts=${min_num_utts} '$2 >= min_num_utts {print $1, $2}' data/train_no_sil/spk2num | utils/filter_scp.pl - data/train_no_sil/spk2utt > data/swbd_sre_combined_no_sil/spk2utt.new
mv data/train_no_sil/spk2utt.new data/train_no_sil/spk2utt
utils/spk2utt_to_utt2spk.pl data/train_no_sil/spk2utt > data/train_no_sil/utt2spk
utils/filter_scp.pl data/train_no_sil/utt2spk data/train_no_sil/utt2num_frames > data/train_no_sil/utt2num_frames.new
mv data/train_no_sil/utt2num_frames.new data/train_no_sil/utt2num_frames
# Now we're ready to create training examples.
utils/fix_data_dir.sh data/train_no_sil
fi
local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \
--data data/train_combined --nnet-dir $nnet_dir \
--egs-dir $nnet_dir/egs
if [ $stage -le 3 ]; then
# Note that there are over one-third of the utterances less than 2 seconds in our training set,
# and these short utterances are harmful for PLDA training. Therefore, to improve performance
# of PLDA modeling and inference, we will combine the short utterances longer than 5 seconds.
utils/data/combine_short_segments_new.sh --speaker-only true \
data/train 5 data/train_comb
# Compute the energy-based VAD for train_comb
sid/compute_vad_decision.sh --nj 8 --cmd "$train_cmd" \
data/train_comb exp/make_vad $vaddir
utils/fix_data_dir.sh data/train_comb
fi
if [ $stage -le 4 ]; then
# Extract xvectors for cn-celeb data (includes Mixer 6). We'll use this for
# things like LDA or PLDA.
sid/nnet3/xvector/extract_xvectors.sh --cmd "$train_cmd --mem 12G" --nj 8 \
$nnet_dir data/train_comb \
exp/xvectors_train_comb
# The enroll and test data
for name in eval_enroll eval_test; do
sid/nnet3/xvector/extract_xvectors.sh --cmd "$train_cmd --mem 6G" --nj 8 \
$nnet_dir data/$name \
exp/xvectors_$name
done
fi
if [ $stage -le 5 ]; then
# Compute the mean vector for centering the evaluation xvectors.
$train_cmd exp/xvectors_train_comb/log/compute_mean.log \
ivector-mean scp:exp/xvectors_train_comb/xvector.scp \
exp/xvectors_train_comb/mean.vec || exit 1;
# This script uses LDA to decrease the dimensionality prior to PLDA.
lda_dim=150
$train_cmd exp/xvectors_train_comb/log/lda.log \
ivector-compute-lda --total-covariance-factor=0.0 --dim=$lda_dim \
"ark:ivector-subtract-global-mean scp:exp/xvectors_train_comb/xvector.scp ark:- |" \
ark:data/train_comb/utt2spk exp/xvectors_train_comb/transform.mat || exit 1;
# Train an out-of-domain PLDA model.
$train_cmd exp/xvectors_train_comb/log/plda.log \
ivector-compute-plda ark:data/train_comb/spk2utt \
"ark:ivector-subtract-global-mean scp:exp/xvectors_train_comb/xvector.scp ark:- | transform-vec exp/xvectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
exp/xvectors_train_comb/plda || exit 1;
fi
if [ $stage -le 6 ]; then
# Compute PLDA scores for CN-Celeb eval core trials
$train_cmd exp/scores/log/cnceleb_eval_scoring.log \
ivector-plda-scoring --normalize-length=true \
--num-utts=ark:exp/xvectors_eval_enroll/num_utts.ark \
"ivector-copy-plda --smoothing=0.0 exp/xvectors_train_comb/plda - |" \
"ark:ivector-mean ark:data/eval_enroll/spk2utt scp:exp/xvectors_eval_enroll/xvector.scp ark:- | ivector-subtract-global-mean exp/xvectors_train_comb/mean.vec ark:- ark:- | transform-vec exp/xvectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
"ark:ivector-subtract-global-mean exp/xvectors_train_comb/mean.vec scp:exp/xvectors_eval_test/xvector.scp ark:- | transform-vec exp/xvectors_train_comb/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
"cat '$eval_trails_core' | cut -d\ --fields=1,2 |" exp/scores/cnceleb_eval_scores || exit 1;
# CN-Celeb Eval Core:
# EER:
# minDCF(p-target=0.01):
# minDCF(p-target=0.001):
echo -e "\nCN-Celeb Eval Core:";
eer=$(paste $eval_trails_core exp/scores/cnceleb_eval_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/cnceleb_eval_scores $eval_trails_core 2> /dev/null`
mindcf2=`sid/compute_min_dcf.py --p-target 0.001 exp/scores/cnceleb_eval_scores $eval_trails_core 2> /dev/null`
echo "EER: $eer%"
echo "minDCF(p-target=0.01): $mindcf1"
echo "minDCF(p-target=0.001): $mindcf2"
fi