Spaces:

DennisHung
/

DiffmorpherXAp-adapter

Runtime error

CSH-1220 commited on Jan 3

Commit

24363dc

1 Parent(s): 1834911

Update how we load pre-trained weights

Files changed (3) hide show

app.py CHANGED Viewed

@@ -3,21 +3,6 @@ import torch
 import torchaudio
 import numpy as np
 import gradio as gr
-from huggingface_hub import hf_hub_download
-model_path = hf_hub_download(
-    repo_id="DennisHung/Pre-trained_AudioMAE_weights",
-    filename="pretrained.pth",
-    local_dir="./",
-    local_dir_use_symlinks=False
-)
-model_path = hf_hub_download(
-    repo_id="DennisHung/Pre-trained_AudioMAE_weights",
-    filename="pytorch_model.bin",
-    local_dir="./",
-    local_dir_use_symlinks=False
-)
 from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline
 # Initialize AudioLDM2 Pipeline
 pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32)

 import torchaudio
 import numpy as np
 import gradio as gr
 from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline
 # Initialize AudioLDM2 Pipeline
 pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32)

audio_encoder/AudioMAE.py CHANGED Viewed

@@ -12,6 +12,7 @@ import librosa.display
 import matplotlib.pyplot as plt
 import numpy as np
 import torchaudio
 # model = mae_vit_base_patch16(in_chans=1, audio_exp=True, img_size=(1024, 128))
 class Vanilla_AudioMAE(nn.Module):
@@ -25,7 +26,11 @@ class Vanilla_AudioMAE(nn.Module):
             in_chans=1, audio_exp=True, img_size=(1024, 128)
         )
-        checkpoint_path = 'pretrained.pth'
         checkpoint = torch.load(checkpoint_path, map_location='cpu')
         msg = model.load_state_dict(checkpoint['model'], strict=False)

 import matplotlib.pyplot as plt
 import numpy as np
 import torchaudio
+from huggingface_hub import hf_hub_download
 # model = mae_vit_base_patch16(in_chans=1, audio_exp=True, img_size=(1024, 128))
 class Vanilla_AudioMAE(nn.Module):
             in_chans=1, audio_exp=True, img_size=(1024, 128)
         )
+        # checkpoint_path = 'pretrained.pth'
+        checkpoint_path = hf_hub_download(
+            repo_id="DennisHung/Pre-trained_AudioMAE_weights",
+            filename="pretrained.pth"
+        )
         checkpoint = torch.load(checkpoint_path, map_location='cpu')
         msg = model.load_state_dict(checkpoint['model'], strict=False)

pipeline/morph_pipeline_successed_ver1.py CHANGED Viewed

@@ -49,8 +49,7 @@ if is_librosa_available():
     import librosa
 import warnings
 import matplotlib.pyplot as plt
 from .pipeline_audioldm2 import AudioLDM2Pipeline
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -91,7 +90,12 @@ for name in  unet.attn_processors.keys():
         else:
             attn_procs[name] = AttnProcessor2_0()
-state_dict = torch.load('pytorch_model.bin', map_location=DEVICE)
 for name, processor in attn_procs.items():
     if hasattr(processor, 'to_v_ip') or hasattr(processor, 'to_k_ip'):
         weight_name_v = name + ".to_v_ip.weight"

     import librosa
 import warnings
 import matplotlib.pyplot as plt
+from huggingface_hub import hf_hub_download
 from .pipeline_audioldm2 import AudioLDM2Pipeline
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
         else:
             attn_procs[name] = AttnProcessor2_0()
+adapter_weight = hf_hub_download(
+    repo_id="DennisHung/Pre-trained_AudioMAE_weights",
+    filename="pytorch_model.bin",
+)
+state_dict = torch.load(adapter_weight, map_location=DEVICE)
 for name, processor in attn_procs.items():
     if hasattr(processor, 'to_v_ip') or hasattr(processor, 'to_k_ip'):
         weight_name_v = name + ".to_v_ip.weight"