Spaces:
Running
on
Zero
Running
on
Zero
| import sys | |
| import torch | |
| sys.path.append('.') | |
| from modules.audio_tokenizer.audio_tokenizer import get_audio_tokenizer | |
| from modules.audio_detokenizer.audio_detokenizer import get_audio_detokenizer, detokenize | |
| import torchaudio | |
| import librosa | |
| if __name__ == '__main__': | |
| audio_tokenizer = get_audio_tokenizer() | |
| audio_detokenizer = get_audio_detokenizer() | |
| input_wav_16k, _ = librosa.load("en_prompt0.wav", sr=16000) | |
| input_wav_24k, _ = librosa.load("en_prompt0.wav", sr=24000) | |
| prompt_sec = 1 | |
| prompt_wav_16k = input_wav_16k[:16000*prompt_sec] | |
| prompt_wav_24k = input_wav_24k[:24000*prompt_sec] | |
| input_wav_16k = input_wav_16k[16000*prompt_sec:] | |
| input_wav_24k = input_wav_24k[24000*prompt_sec:] | |
| prompt_wav_24k = torch.tensor(prompt_wav_24k)[None, :].cuda() | |
| prompt_wav_16k = torch.tensor(prompt_wav_16k)[None, :].cuda() | |
| input_wav_24k = torch.tensor(input_wav_24k)[None, :].cuda() | |
| input_wav_16k = torch.tensor(input_wav_16k)[None, :].cuda() | |
| semantic_token = audio_tokenizer.tokenize(input_wav_16k) | |
| prompt_semantic_token = audio_tokenizer.tokenize(prompt_wav_16k) | |
| recon_wav = detokenize(audio_detokenizer, semantic_token, prompt_wav_24k, prompt_semantic_token) | |
| print(recon_wav.shape) | |
| torchaudio.save("test/tmp_recon_en_prompt0.wav", recon_wav.cpu(), 24000) | |
| print("All tests passed!") |