-
Notifications
You must be signed in to change notification settings - Fork 6
/
kaldi.sh
executable file
·58 lines (48 loc) · 1.98 KB
/
kaldi.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
# Copyright 2017 Johns Hopkins University (Author: Daniel Garcia-Romero)
# 2017 Johns Hopkins University (Author: Daniel Povey)
# 2017-2018 David Snyder
# 2018 Ewald Enzinger
# Apache 2.0.
#
# See ../README.txt for more info on data required.
# Results (mostly equal error-rates) are inline in comments below.
cd ../kaldi/egs/voxceleb/v2 # Move to the recipe's root dir
. ./cmd.sh
. ./path.sh
set -e
mfccdir=`pwd`/mfcc
vaddir=`pwd`/mfcc
# The trials file is downloaded by local/make_voxceleb1.pl.
voxceleb1_trials=data/voxceleb1_test/trials
voxceleb1_root=/export/corpora/VoxCeleb1
voxceleb2_root=~/projects/waifuchat/temp/utt
nnet_dir=exp/xvector_nnet_1a
musan_root=/export/corpora/JHU/musan
results_dir=~/projects/waifuchat/temp
stage=0
if [ $stage -le 0 ]; then
#local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train
local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test
# This script reates data/voxceleb1_test and data/voxceleb1_train.
# Our evaluation set is the test portion of VoxCeleb1.
#local/make_voxceleb1.pl $voxceleb1_root data
# We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
# This should give 7,351 speakers and 1,277,503 utterances.
utils/combine_data.sh data/train data/voxceleb2_test
fi
if [ $stage -le 1 ]; then
# Make MFCCs and compute the energy-based VAD for each dataset
for name in train; do
steps/make_mfcc.sh --write-utt2num-frames true --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
data/${name} exp/make_mfcc $mfccdir
utils/fix_data_dir.sh data/${name}
sid/compute_vad_decision.sh --nj 1 --cmd "$train_cmd" \
data/${name} exp/make_vad $vaddir
utils/fix_data_dir.sh data/${name}
done
fi
sid/nnet3/xvector/extract_xvectors.sh --cmd "$train_cmd --mem 4G" --nj 1 \
$nnet_dir data/train \
$nnet_dir/xvectors_train
../../../src/bin/copy-vector ark:$nnet_dir/xvectors_train/xvector.1.ark ark,t:$results_dir/xvectors.txt