Spaces:
Build error
Build error
| import torch | |
| import numpy as np | |
| import gradio as gr | |
| from transformers import AutoProcessor, AutoModel, pipeline, MarianMTModel, MarianTokenizer | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| # load speech translation checkpoint | |
| asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device) | |
| # load text-to-speech checkpoint and speaker embeddings | |
| processor = AutoProcessor.from_pretrained("suno/bark-small") | |
| model = AutoModel.from_pretrained("suno/bark-small").to(device) | |
| # load MartianMT model for translating English to Hindi. | |
| martian_mt_model = MarianMTModel.from_pretrained("AbhirupGhosh/opus-mt-finetuned-en-hi") | |
| martian_mt_tokenizer = MarianTokenizer.from_pretrained("AbhirupGhosh/opus-mt-finetuned-en-hi") | |
| def translate_english_to_hindi(english_text): | |
| tokenized_text = martian_mt_tokenizer.encode(english_text, return_tensors="pt") | |
| generated_token_ids = martian_mt_model.generate(tokenized_text, use_cache=True) | |
| hindi_text = martian_mt_tokenizer.decode(generated_token_ids.numpy()[0]) | |
| hindi_text = hindi_text.replace("</s>", "") | |
| hindi_text = hindi_text.replace("<pad>", "") | |
| return hindi_text | |
| def translate_to_english(audio): | |
| outputs = asr_pipe(audio, generate_kwargs={"task": "transcribe", "use_cache":"True"}) | |
| return outputs["text"] | |
| def synthesise(text): | |
| inputs = processor(text=text, return_tensors="pt").to(device) | |
| speech_values = model.generate(**inputs, use_cache=True) | |
| speech_values = speech_values.cpu().numpy() | |
| return speech_values | |
| def speech_to_hindi_translation(audio): | |
| english_text = translate_to_english(audio) | |
| hindi_text = translate_english_to_hindi(english_text) | |
| synthesised_speech = synthesise(hindi_text)[0] | |
| synthesised_speech = (synthesised_speech * 32767).astype(np.int16) | |
| return 22050, synthesised_speech | |
| title = "Speech-To-Speech-Translation for Hindi" | |
| description = """ | |
|  | |
| """ | |
| demo = gr.Blocks() | |
| mic_translate = gr.Interface( | |
| fn=speech_to_hindi_translation, | |
| inputs=gr.Audio(source="microphone", type="filepath"), | |
| outputs=gr.Audio(label="Generated Speech", type="numpy"), | |
| title=title, | |
| description=description, | |
| ) | |
| file_translate = gr.Interface( | |
| fn=speech_to_hindi_translation, | |
| inputs=gr.Audio(source="upload", type="filepath"), | |
| outputs=gr.Audio(label="Generated Speech", type="numpy"), | |
| # examples=["./example.wav"]], | |
| title=title, | |
| description=description, | |
| ) | |
| with demo: | |
| gr.TabbedInterface([mic_translate, file_translate], ["Microphone", "Audio File"]) | |
| demo.launch(debug=True) | |