-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathapp.py
154 lines (122 loc) · 4.67 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import sys
import os
import torch
import gradio as gr
import tempfile
kokoro_path = os.path.abspath('Kokoro-82M')
print(f"kokoro_path :{kokoro_path}")
if kokoro_path not in sys.path:
sys.path.insert(0, kokoro_path)
try:
from kokoro import generate
from models import build_model
except ImportError:
print("Kokoro not found. Please install the Kokoro-82M package.")
generate = None
# Add the 'src' directory to the Python path
src_path = os.path.abspath('src')
if src_path not in sys.path:
sys.path.insert(0, src_path)
from src.long_speech_generation import generate_long_text_optimized
from src.podcast_generation import generate_audio
from src.utils import read_file_content, process_long_text, generate_audio_enhanced
# Import the Tab UI components
from podcast_tab import create_podcast_tab
from text_to_speech_tab import create_text_to_speech_tab
VOICEPACK_DIR = os.path.join(kokoro_path, "voices")
MODELS_LIST = {
"v0_19-full-fp32": os.path.join(kokoro_path, "kokoro-v0_19.pth"),
"v0_19-half-fp16": os.path.join(kokoro_path, "fp16/kokoro-v0_19-half.pth"),
}
# Available voices
CHOICES = {
'🇺🇸 🚺 American Female ⭐': 'af',
'🇺🇸 🚺 Bella ⭐': 'af_bella',
'🇺🇸 🚺 Sarah ⭐': 'af_sarah',
'🇺🇸 🚹 Michael ⭐': 'am_michael',
'🇺🇸 🚹 nicole': 'af_nicole',
'🇺🇸 🚹 sky': 'af_sky',
'🇺🇸 🚹 Adam': 'am_adam',
'🇬🇧 🚺 British Female emma': 'bf_emma',
'🇬🇧 🚺 British Female isabella': 'bf_isabella',
'🇬🇧 🚹 British Male george': 'bm_george',
'🇬🇧 🚹 British Male lewis': 'bm_lewis',
}
# Device Selection
device_options = ["auto", "cpu", "cuda"]
# Initialize model and voices (lazy loading)
MODEL_NAME = None
MODEL = None
MODEL_DEVICE = None
VOICES = {}
# Text normalization functions (simplified)
def normalize_text(text):
text = text.replace("’", "'")
text = re.sub(r'\s+', ' ', text).strip()
return text
SAMPLE_RATE = 24000
def load_model_and_voice(selected_device, model_path, voice):
global MODEL, VOICES, MODELS_LIST, MODEL_NAME, MODEL_DEVICE
try:
if selected_device == "auto":
device = 'cuda' if torch.cuda.is_available() else 'cpu'
elif selected_device == "cuda":
if torch.cuda.is_available():
print("CUDA is available. Using GPU.")
device = 'cuda'
else:
print("CUDA is not available. Using CPU instead.")
device = 'cpu'
else:
device = 'cpu'
except Exception as e:
print("CUDA Error is not available. Using CPU instead.")
device = 'cpu'
# Check if we need to reload the model
should_reload = (
MODEL is None or
MODEL_DEVICE != device or
MODEL_NAME != model_path
)
if should_reload:
MODEL = build_model(model_path, device)
MODEL_NAME = model_path
MODEL_DEVICE = device
print(f"Loaded model {model_path} on {device}")
if voice not in VOICES:
VOICES[voice] = torch.load(os.path.join(VOICEPACK_DIR, f'{voice}.pt'), map_location=device)
print(f'Loaded voice: {voice} on {device}')
return MODEL, VOICES[voice]
def update_input_visibility(choice):
return {
text_input: gr.update(visible=choice == "Direct Text"),
file_input: gr.update(visible=choice == "File Upload")
}
# Gradio Interface
with gr.Blocks() as app:
gr.Markdown("""## Local11labs Text-to-Speech Webui
This is a simple web interface for the Kokoro-82M text-to-speech model.""")
# Text-to-Speech Tab
(input_type, text_input, file_input, model_dropdown, voice_dropdown,
speed_slider, device_dropdown, process_type, generate_button,
audio_output, text_output, status_output) = create_text_to_speech_tab(
models_list=MODELS_LIST,
choices=CHOICES,
device_options=device_options,
update_input_visibility=update_input_visibility,
load_model_and_voice=load_model_and_voice,
)
# Podcast Tab
(generate_podcast_script_button, send_to_audio_input_button, podcast_script_json_output,
podcast_dialogue_status_output, podcast_script_json_input, podcast_host_voice_assignment_inputs,
podcast_model_dropdown, podcast_speed_slider, podcast_device_dropdown, generate_podcast_audio_button,
podcast_audio_output, podcast_audio_status_output) = create_podcast_tab(
models_list=MODELS_LIST,
choices=CHOICES,
device_options=device_options,
kokoro_path=kokoro_path,
load_model_and_voice=load_model_and_voice,
)
# Run the app
if __name__ == "__main__":
app.launch(share=True, debug=True)