-
Notifications
You must be signed in to change notification settings - Fork 6
/
extract_phn_posteriorgram.sh
executable file
·88 lines (60 loc) · 2.17 KB
/
extract_phn_posteriorgram.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
# Begin configuration section
nj=40
stage=1
decode_nj=1
output_path=out
. ./path.sh
. ./cmd.sh
set -e # exit on error
[[ ! -L "steps" ]] && ln -s $KALDI_ROOT/egs/wsj/s5/steps
[[ ! -L "utils" ]] && ln -s $KALDI_ROOT/egs/wsj/s5/utils
audio_path=$1
output_path=$2
rec_name=$(basename -- $audio_path)
audio_format=(${audio_path##*.})
rec_id=(${rec_name//$(echo ".$audio_format")/ })
echo $rec_id
. ./utils/parse_options.sh
echo; echo "===> START TIME : $(date +"%D_%T") ====="; echo
if [[ $stage -le 1 ]]; then
echo "DATA PREPARATION"
# Format the raw input lyrics and audio to be
# processed in the standard Kaldi format.
# We prepare separate data directories
# for the original and the source separated
# recording.
mkdir -p data/${rec_id}
python3 local/data_preparation_ALT.py $audio_path data/${rec_id}
./utils/fix_data_dir.sh data/${rec_id}
fi
mfccdir=mfcc
ivector_model=models/ivector/extractor
if [[ $stage -le 2 ]]; then
echo "============================="
echo "---- MFCC FEATURE EXTRACTION ----"
echo "===== $(date +"%D_%T") ====="
steps/make_mfcc.sh --cmd "$train_cmd" --nj 1 --mfcc-config conf/mfcc_hires.conf \
data/$rec_id exp/make_mfcc/$rec_id $mfccdir
steps/compute_cmvn_stats.sh data/${rec_id}
utils/fix_data_dir.sh data/${rec_id}
echo "I-VECTOR EXTRACTION on VAD data"
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 1 \
data/${rec_id} $ivector_model exp/ivector/ivectors_${rec_id}
fi
tree_dir=models/tree
acoustic_model_dir=models/ctdnnsa_ivec
lang_dir=models/lang # This is actually not a language model, rather pronunciation model,
# but the folder contains files that define that phoneme space.
if [[ $stage -le 3 ]]; then
steps/chain/get_phone_post.sh --remove-word-position-dependency true \
--online-ivector-dir exp/ivector/ivectors_${rec_id} \
$tree_dir $acoustic_model_dir $lang_dir data/${rec_id} exp/phn_post_${rec_id}
mkdir -p $output_path/${rec_id}
python3 local/reformat_phone_post.py exp/phn_post_${rec_id} $output_path/${rec_id}
fi
echo
echo "===== $(date +"%D_%T") ====="
echo "===== PROCESS ENDED ====="
echo
exit 1