-
Notifications
You must be signed in to change notification settings - Fork 287
/
Copy pathcoqui_test.py
67 lines (56 loc) · 2.5 KB
/
coqui_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
1. Create and activate venv:
python -m venv venv
venv\Scripts\activate.bat
2. Install dependencies:
pip install realtimetts[coqui]
3. Update CUDA and install deepspeed for faster processing:
pip install torch==2.1.2+cu121 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121
pip install https://github.com/daswer123/deepspeed-windows-wheels/releases/download/11.2/deepspeed-0.11.2+cuda121-cp310-cp310-win_amd64.whl
"""
if __name__ == "__main__":
import time
from RealtimeTTS import TextToAudioStream, CoquiEngine
def dummy_generator():
yield "Hey guys. These here are realtime spoken sentences based on local text synthesis. "
yield "With a local, neuronal, cloned voice. So every spoken sentence sounds unique."
def create_synthesis_callbacks(start_time):
# Use a local variable to store the synthesis start time
sentence_synth_start = None
def before_sentence_callback(_):
nonlocal sentence_synth_start
sentence_synth_start = time.time()
elapsed = sentence_synth_start - start_time
print("<SYNTHESIS_START>", f"{elapsed:.2f}s")
def on_sentence_callback(_):
if sentence_synth_start is not None:
delta = time.time() - sentence_synth_start
print("<SYNTHESIS_DONE>", f"Delta: {delta:.2f}s")
else:
print("<SYNTHESIS_DONE>", "No start time recorded.")
return before_sentence_callback, on_sentence_callback
# for normal use with minimal logging:
engine = CoquiEngine(use_deepspeed=True)
# test with extended logging:
# import logging
# logging.basicConfig(level=logging.INFO)
# engine = CoquiEngine(level=logging.INFO)
start_time = 0
def on_audio_stream_start_callback():
global start_time
delta = time.time() - start_time
print("<TTFT>", f"Time: {delta:.2f}s")
stream = TextToAudioStream(engine, on_audio_stream_start=on_audio_stream_start_callback)
stream.feed("warm up").play(muted=True)
print("Starting to play stream")
before_sentence_callback, on_sentence_callback = create_synthesis_callbacks(start_time)
start_time = time.time()
stream.feed(dummy_generator()).play(
log_synthesized_text=True,
output_wavfile="output.wav",
before_sentence_synthesized=before_sentence_callback,
on_sentence_synthesized=on_sentence_callback,
)
end_time = time.time()
print("Playout finished")
engine.shutdown()