forked from as-ideas/TransformerTTS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
60 lines (44 loc) · 1.61 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Set up the paths
from pathlib import Path
MelGAN_path = 'melgan/'
TTS_path = 'TransformerTTS/'
import sys
sys.path.append(TTS_path)
# Load pretrained model
from model.factory import tts_ljspeech
from data.audio import Audio
model, config = tts_ljspeech()
audio = Audio(config)
# Synthesize text
sentence = 'Scientists at the CERN laboratory, say they have discovered a new particle.'
out_normal = model.predict(sentence)
# Convert spectrogram to wav (with griffin lim)
wav = audio.reconstruct_waveform(out_normal['mel'].numpy().T)
import IPython.display as ipd
ipd.display(ipd.Audio(wav, rate=config['sampling_rate']))
# 20% faster
sentence = 'Scientists at the CERN laboratory, say they have discovered a new particle.'
out = model.predict(sentence, speed_regulator=1.20)
wav = audio.reconstruct_waveform(out['mel'].numpy().T)
ipd.display(ipd.Audio(wav, rate=config['sampling_rate']))
# 10% slower
sentence = 'Scientists at the CERN laboratory, say they have discovered a new particle.'
out = model.predict(sentence, speed_regulator=.9)
wav = audio.reconstruct_waveform(out['mel'].numpy().T)
ipd.display(ipd.Audio(wav, rate=config['sampling_rate']))
# Do some sys cleaning
sys.path.remove(TTS_path)
sys.modules.pop('model')
sys.path.append(MelGAN_path)
import torch
import numpy as np
vocoder = torch.hub.load('seungwonpark/melgan', 'melgan')
vocoder.eval()
mel = torch.tensor(out_normal['mel'].numpy().T[np.newaxis,:,:])
if torch.cuda.is_available():
vocoder = vocoder.cuda()
mel = mel.cuda()
with torch.no_grad():
audio = vocoder.inference(mel)
# Display audio
ipd.display(ipd.Audio(audio.cpu().numpy(), rate=22050))