nambn0321 commited on
Commit
c6a2465
·
verified ·
1 Parent(s): f2dea3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -4,8 +4,6 @@ import torchaudio
4
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
5
  from transformers.models.speecht5 import SpeechT5HifiGan
6
 
7
- print(torch.cuda.is_available())
8
-
9
  # Load model and processor
10
  processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_british_2nd_attempt")
11
  model = SpeechT5ForTextToSpeech.from_pretrained("nambn0321/TTS_british_2nd_attempt", use_safetensors=True, trust_remote_code=True)
@@ -30,15 +28,17 @@ def tts_generate(text):
30
  # Generate waveform directly (with vocoder)
31
  print("🎤 Generating speech waveform...")
32
  with torch.no_grad():
33
- mel_output, _ = model(input_ids=inputs["input_ids"])
34
- waveform = vocoder.decode(mel_output) # Using vocoder to decode
35
-
 
 
36
  print("✅ Waveform generated.")
37
 
38
  # Save waveform
39
  output_path = "output.wav"
40
  if waveform.dim() == 1:
41
- waveform = waveform.unsqueeze(0) # Ensure it's in batch format
42
  torchaudio.save(output_path, waveform.cpu(), sample_rate=16000)
43
  print(f"💾 Audio saved to {output_path}")
44
 
 
4
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
5
  from transformers.models.speecht5 import SpeechT5HifiGan
6
 
 
 
7
  # Load model and processor
8
  processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_british_2nd_attempt")
9
  model = SpeechT5ForTextToSpeech.from_pretrained("nambn0321/TTS_british_2nd_attempt", use_safetensors=True, trust_remote_code=True)
 
28
  # Generate waveform directly (with vocoder)
29
  print("🎤 Generating speech waveform...")
30
  with torch.no_grad():
31
+ waveform = model.generate_speech(
32
+ inputs["input_ids"],
33
+ speaker_embedding,
34
+ vocoder=vocoder
35
+ )
36
  print("✅ Waveform generated.")
37
 
38
  # Save waveform
39
  output_path = "output.wav"
40
  if waveform.dim() == 1:
41
+ waveform = waveform.unsqueeze(0)
42
  torchaudio.save(output_path, waveform.cpu(), sample_rate=16000)
43
  print(f"💾 Audio saved to {output_path}")
44