Skip to content

Commit

Permalink
Librispeech v2
Browse files Browse the repository at this point in the history
  • Loading branch information
caofrance committed Mar 20, 2015
1 parent 1266eda commit ef71e1f
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 11 deletions.
3 changes: 2 additions & 1 deletion corpora/LibriSpeech_train-clean-100/bin/copy_phones.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
#copy phones.txt from if doesn't already exist
cp -u /fhgfs/bootphon/data/raw_data/LibriSpeech/phones.txt /fhgfs/bootphon/scratch/xcao/github_abkhazia/abkhazia/corpora/LibriSpeech/data/
cd /fhgfs/bootphon/scratch/xcao/github_abkhazia/abkhazia/corpora/LibriSpeech_train-clean-100/data/
cp -f -u /fhgfs/bootphon/data/raw_data/LibriSpeech/phones.txt ./phones.txt
4 changes: 3 additions & 1 deletion corpora/LibriSpeech_train-clean-100/bin/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# paths - needs to change paths and versions of Librispeech
# create 'data' directory if doesn't exist
raw_path = "/fhgfs/bootphon/data/raw_data/LibriSpeech/"
dict_path = "/fhgfs/bootphon/data/raw_data/cmu_combined/"
dict_path = "/fhgfs/bootphon/data/raw_data/CMU_dict/"
derived_path = "/fhgfs/bootphon/data/derived_data/LibriSpeech_abkhazia/data/"
github_path = "/fhgfs/bootphon/scratch/xcao/github_abkhazia/abkhazia/corpora/LibriSpeech_train-clean-100/data/"

Expand Down Expand Up @@ -138,6 +138,8 @@ def text(trs, wav):
outfile = open(output_file_text, "w")
outfile2 = open(output_corrupted_wavs, "w")
input_dir = os.path.join(derived_path, trs)
if not os.path.isdir(input_dir):
os.makedirs(input_dir)
input_dir_wav = os.path.join(derived_path, wav)
files = os.listdir(input_dir)
wav_list = os.listdir(input_dir_wav)
Expand Down
3 changes: 2 additions & 1 deletion corpora/LibriSpeech_train-clean-360/bin/copy_phones.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
#copy phones.txt from if doesn't already exist
cp -u /fhgfs/bootphon/data/raw_data/LibriSpeech/phones.txt /fhgfs/bootphon/scratch/xcao/github_abkhazia/abkhazia/corpora/LibriSpeech/data/
cd /fhgfs/bootphon/scratch/xcao/github_abkhazia/abkhazia/corpora/LibriSpeech_train-clean-360/data/
cp -f -u /fhgfs/bootphon/data/raw_data/LibriSpeech/phones.txt ./phones.txt
18 changes: 10 additions & 8 deletions corpora/LibriSpeech_train-clean-360/bin/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# paths - needs to change paths and versions of Librispeech
# create 'data' directory if doesn't exist
raw_path = "/fhgfs/bootphon/data/raw_data/LibriSpeech/"
dict_path = "/fhgfs/bootphon/data/raw_data/cmu_combined/"
dict_path = "/fhgfs/bootphon/data/raw_data/CMU_dict/"
derived_path = "/fhgfs/bootphon/data/derived_data/LibriSpeech_abkhazia/data/"
github_path = "/fhgfs/bootphon/scratch/xcao/github_abkhazia/abkhazia/corpora/LibriSpeech_train-clean-360/data/"

Expand Down Expand Up @@ -138,6 +138,8 @@ def text(trs, wav):
outfile = open(output_file_text, "w")
outfile2 = open(output_corrupted_wavs, "w")
input_dir = os.path.join(derived_path, trs)
if not os.path.isdir(input_dir):
os.makedirs(input_dir)
input_dir_wav = os.path.join(derived_path, wav)
files = os.listdir(input_dir)
wav_list = os.listdir(input_dir_wav)
Expand Down Expand Up @@ -247,15 +249,15 @@ def copy_phones():


#Running the different steps
copy_flac ('train-clean-360','flac_train-clean-360')
convert_flac()
copy_trs ('train-clean-360','trs_train-clean-360')
convert_speaker_ID_wav ('wav_train-clean-360')
link_wavs()
segments_speakers('wav_train-clean-360')
#copy_flac ('train-clean-360','flac_train-clean-360')
#convert_flac()
#copy_trs ('train-clean-360','trs_train-clean-360')
#convert_speaker_ID_wav ('wav_train-clean-360')
#link_wavs()
#segments_speakers('wav_train-clean-360')
text('trs_train-clean-360', 'wav_train-clean-360')
lexicon('trs_train-clean-360')
copy_phones()
#copy_phones()



Expand Down
39 changes: 39 additions & 0 deletions corpora/LibriSpeech_train-clean-360/bin/phones.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
S s
UW u
T t
N n
K k
Y j
Z z
AO ɔ
AY aɪ
SH ʃ
W w
NG ŋ
EY e
B b
CH ʧ
OY ɔɪ
JH ʤ
D d
ZH ʒ
G g
UH ʊ
F f
V v
ER ɝ
AA ɑ
IH ɪ
M m
DH ð
L l
AH ʌ
P p
OW o
AW aʊ
HH h
AE æ
R r
TH θ
IY i
EH ɛ

0 comments on commit ef71e1f

Please sign in to comment.