Spaces:

nambn0321
/

T5_First_US_Accent

Sleeping

App Files Files Community

nambn0321 commited on Sep 17

Commit

203c275

verified ·

1 Parent(s): 657fb95

Create app.py

Browse files

Files changed (1) hide show

app.py +67 -0

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import torch
+import gradio as gr
+import torchaudio
+# from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
+from transformers.models.speecht5 import SpeechT5HifiGan
+# Load model and processor
+processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_with_T5_4")
+model = SpeechT5ForTextToSpeech.from_pretrained(
+    "nambn0321/TTS_with_T5_4",
+    use_safetensors=True,
+    trust_remote_code=True
+)
+vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+# Move to CUDA if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+vocoder = vocoder.to(device)
+def tts_generate(text):
+    print(f" Input text: {text}")
+    try:
+        # Preprocess input
+        print(" Processing input...")
+        inputs = processor(text=text, return_tensors="pt").to(device)
+        print(" Text processed.")
+        # Generate waveform directly (with vocoder)
+        print("🎤 Generating speech waveform...")
+        with torch.no_grad():
+            waveform = model.generate_speech(
+                inputs["input_ids"],
+                vocoder=vocoder
+            )
+        print(" Waveform generated.")
+        # Save waveform
+        output_path = "output.wav"
+        if waveform.dim() == 1:
+            waveform = waveform.unsqueeze(0)
+        torchaudio.save(output_path, waveform.cpu(), sample_rate=16000)
+        print(f" Audio saved to {output_path}")
+        return output_path
+    except Exception as e:
+        print(" Error during TTS generation:", e)
+        return "Error during speech synthesis."
+# Gradio interface
+demo = gr.Interface(
+    fn=tts_generate,
+    inputs=gr.Textbox(label="Enter text"),
+    outputs=gr.Audio(label="Generated Speech", type="filepath"),
+    title="SpeechT5 Text-to-Speech",
+    description="Enter text and hear it spoken with SpeechT5 + HiFi-GAN vocoder."
+)
+if __name__ == "__main__":
+    print(" Launching Gradio demo...")
+    demo.launch()