Spaces:
Paused
Paused
Update chat_ai.py
Browse files- chat_ai.py +10 -8
chat_ai.py
CHANGED
@@ -60,7 +60,6 @@ def load_models():
|
|
60 |
return vocoder, F5TTS_ema_model, whisper_processor, whisper_model, device
|
61 |
|
62 |
# Cargar modelos una sola vez y almacenarlos en variables globales dentro de la función
|
63 |
-
# Esto se logra usando atributos de función
|
64 |
def get_models():
|
65 |
if not hasattr(get_models, "vocoder"):
|
66 |
get_models.vocoder, get_models.F5TTS_ema_model, get_models.whisper_processor, get_models.whisper_model, get_models.device = load_models()
|
@@ -78,20 +77,23 @@ def infer(
|
|
78 |
# Preprocesar el audio de referencia y el texto de referencia
|
79 |
ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text)
|
80 |
|
|
|
|
|
|
|
81 |
# Asegurar que el texto a generar esté correctamente formateado
|
82 |
if not gen_text.startswith(" "):
|
83 |
gen_text = " " + gen_text
|
84 |
if not gen_text.endswith(". "):
|
85 |
gen_text += ". "
|
86 |
-
|
87 |
# El texto ingresado por el usuario se utiliza directamente sin modificaciones
|
88 |
input_text = gen_text
|
89 |
-
|
90 |
print(f"Texto para generar audio: {input_text}") # Debug: Verificar el texto
|
91 |
-
|
92 |
# Procesar la inferencia para generar el audio
|
93 |
final_wave, final_sample_rate, combined_spectrogram = infer_process(
|
94 |
-
ref_audio
|
95 |
ref_text,
|
96 |
input_text,
|
97 |
F5TTS_ema_model,
|
@@ -100,7 +102,7 @@ def infer(
|
|
100 |
speed=speed,
|
101 |
progress=gr.Progress(),
|
102 |
)
|
103 |
-
|
104 |
# Eliminar silencios si está activado
|
105 |
if remove_silence:
|
106 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
@@ -108,12 +110,12 @@ def infer(
|
|
108 |
remove_silence_for_generated_wav(f.name)
|
109 |
final_wave, _ = torchaudio.load(f.name)
|
110 |
final_wave = final_wave.squeeze().cpu().numpy()
|
111 |
-
|
112 |
# Guardar el espectrograma (opcional)
|
113 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
|
114 |
spectrogram_path = tmp_spectrogram.name
|
115 |
save_spectrogram(combined_spectrogram, spectrogram_path)
|
116 |
-
|
117 |
return (final_sample_rate, final_wave), spectrogram_path
|
118 |
except Exception as e:
|
119 |
# Log del error para depuración
|
|
|
60 |
return vocoder, F5TTS_ema_model, whisper_processor, whisper_model, device
|
61 |
|
62 |
# Cargar modelos una sola vez y almacenarlos en variables globales dentro de la función
|
|
|
63 |
def get_models():
|
64 |
if not hasattr(get_models, "vocoder"):
|
65 |
get_models.vocoder, get_models.F5TTS_ema_model, get_models.whisper_processor, get_models.whisper_model, get_models.device = load_models()
|
|
|
77 |
# Preprocesar el audio de referencia y el texto de referencia
|
78 |
ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text)
|
79 |
|
80 |
+
# Mover solo ref_audio al dispositivo
|
81 |
+
ref_audio = ref_audio.to(device)
|
82 |
+
|
83 |
# Asegurar que el texto a generar esté correctamente formateado
|
84 |
if not gen_text.startswith(" "):
|
85 |
gen_text = " " + gen_text
|
86 |
if not gen_text.endswith(". "):
|
87 |
gen_text += ". "
|
88 |
+
|
89 |
# El texto ingresado por el usuario se utiliza directamente sin modificaciones
|
90 |
input_text = gen_text
|
91 |
+
|
92 |
print(f"Texto para generar audio: {input_text}") # Debug: Verificar el texto
|
93 |
+
|
94 |
# Procesar la inferencia para generar el audio
|
95 |
final_wave, final_sample_rate, combined_spectrogram = infer_process(
|
96 |
+
ref_audio,
|
97 |
ref_text,
|
98 |
input_text,
|
99 |
F5TTS_ema_model,
|
|
|
102 |
speed=speed,
|
103 |
progress=gr.Progress(),
|
104 |
)
|
105 |
+
|
106 |
# Eliminar silencios si está activado
|
107 |
if remove_silence:
|
108 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
|
|
110 |
remove_silence_for_generated_wav(f.name)
|
111 |
final_wave, _ = torchaudio.load(f.name)
|
112 |
final_wave = final_wave.squeeze().cpu().numpy()
|
113 |
+
|
114 |
# Guardar el espectrograma (opcional)
|
115 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
|
116 |
spectrogram_path = tmp_spectrogram.name
|
117 |
save_spectrogram(combined_spectrogram, spectrogram_path)
|
118 |
+
|
119 |
return (final_sample_rate, final_wave), spectrogram_path
|
120 |
except Exception as e:
|
121 |
# Log del error para depuración
|