Spaces:

Etadingrui
/

PIWM

Sleeping

App Files Files Community

musictimer commited on Sep 6

Commit

93dbff3

1 Parent(s): a29f249

Fix bug 4

Browse files

Files changed (1) hide show

app.py +55 -60

app.py CHANGED Viewed

@@ -29,6 +29,7 @@ from src.agent import Agent
 from src.csgo.web_action_processing import WebCSGOAction, web_keys_to_csgo_action_names
 from src.envs import WorldModelEnv
 from src.game.web_play_env import WebPlayEnv
 from config_web import web_config
 # Configure logging
@@ -87,39 +88,56 @@ class WebGameEngine:
         import time
         self.time_module = time
-    async def _download_model_async(self, url, filepath):
-        """Download model asynchronously with progress tracking"""
         import asyncio
         import concurrent.futures
-        import urllib.request
-        import os
-        def download_with_progress():
-            """Download function that runs in thread pool"""
-            def progress_hook(block_num, block_size, total_size):
-                if total_size > 0:
-                    progress = min(100, (block_num * block_size * 100) / total_size)
-                    new_progress = int(progress)
-                    # Update progress more frequently for smooth progress bar
-                    if new_progress != self.download_progress:
-                        self.download_progress = new_progress
-                        self.loading_status = f"Downloading AI model ({self.download_progress}%)"
-                        # Log every 5% instead of 10% for better feedback
-                        if self.download_progress % 5 == 0:
-                            logger.info(f"Download progress: {self.download_progress}%")
-            urllib.request.urlretrieve(url, filepath, reporthook=progress_hook)
-            self.download_progress = 100
-            self.loading_status = "Download complete! Loading model..."
-        # Run download in thread pool to avoid blocking
         loop = asyncio.get_event_loop()
         with concurrent.futures.ThreadPoolExecutor() as executor:
-            await loop.run_in_executor(executor, download_with_progress)
-        logger.info("Model download completed!")
     async def initialize_models(self):
         """Initialize the AI models and environment"""
@@ -159,43 +177,20 @@ class WebGameEngine:
             # Try to load checkpoint (remote first, then local, then dummy mode)
             try:
-                # First try to download from Hugging Face Hub using direct URL
-                try:
-                    import torch.hub
-                    import os
-                    logger.info("Downloading model from Hugging Face Hub...")
-                    # Direct download URL (change 'blob' to 'resolve' for direct download)
-                    model_url = "https://huggingface.co/Etadingrui/diamond-1B/resolve/main/agent_epoch_00003.pt"
-                    # Download to cache directory
-                    cache_dir = "./cache"
-                    os.makedirs(cache_dir, exist_ok=True)
-                    model_cache_path = os.path.join(cache_dir, "agent_epoch_00003.pt")
-                    # Download if not cached
-                    if not os.path.exists(model_cache_path):
-                        logger.info(f"Downloading 1.53GB model to {model_cache_path}...")
-                        self.loading_status = "Downloading AI model from Hugging Face Hub..."
-                        # Download with progress tracking in a separate thread
-                        await self._download_model_async(model_url, model_cache_path)
-                    else:
-                        logger.info(f"Using cached model from {model_cache_path}")
-                        self.loading_status = "Loading cached model..."
-                    # Use the agent's load method which expects a file path
-                    self.loading_status = "Loading model weights..."
-                    agent.load(model_cache_path)
-                    logger.info(f"Successfully loaded checkpoint from HF Hub")
-                except Exception as hub_error:
-                    logger.warning(f"Failed to download from HF Hub: {hub_error}")
                     # Fallback to local checkpoint if available
                     checkpoint_path = web_config.get_checkpoint_path()
                     if checkpoint_path.exists():
-                        logger.info(f"Falling back to local checkpoint: {checkpoint_path}")
                         agent.load(checkpoint_path)
                         logger.info(f"Successfully loaded local checkpoint: {checkpoint_path}")
                     else:

 from src.csgo.web_action_processing import WebCSGOAction, web_keys_to_csgo_action_names
 from src.envs import WorldModelEnv
 from src.game.web_play_env import WebPlayEnv
+from src.utils import extract_state_dict
 from config_web import web_config
 # Configure logging
         import time
         self.time_module = time
+    async def _load_model_from_url_async(self, agent, device):
+        """Load model from URL using torch.hub (HF Spaces compatible)"""
         import asyncio
         import concurrent.futures
+        def load_model_weights():
+            """Load model weights in thread pool to avoid blocking"""
+            try:
+                # Use torch.hub.load_state_dict_from_url which is HF Spaces compatible
+                model_url = "https://huggingface.co/Etadingrui/diamond-1B/resolve/main/agent_epoch_00003.pt"
+                logger.info(f"Loading model from {model_url} using torch.hub...")
+                # Update progress
+                self.download_progress = 10
+                self.loading_status = "Downloading model with torch.hub..."
+                # Load state dict directly from URL (handles caching automatically)
+                state_dict = torch.hub.load_state_dict_from_url(
+                    model_url,
+                    map_location=device,
+                    progress=True  # Show download progress
+                )
+                self.download_progress = 80
+                self.loading_status = "Loading model weights..."
+                # Load each component of the agent using extract_state_dict (same as agent.load method)
+                if any(k.startswith("denoiser") for k in state_dict.keys()):
+                    agent.denoiser.load_state_dict(extract_state_dict(state_dict, "denoiser"))
+                if any(k.startswith("upsampler") for k in state_dict.keys()) and agent.upsampler is not None:
+                    agent.upsampler.load_state_dict(extract_state_dict(state_dict, "upsampler"))
+                if any(k.startswith("rew_end_model") for k in state_dict.keys()) and agent.rew_end_model is not None:
+                    agent.rew_end_model.load_state_dict(extract_state_dict(state_dict, "rew_end_model"))
+                if any(k.startswith("actor_critic") for k in state_dict.keys()) and agent.actor_critic is not None:
+                    agent.actor_critic.load_state_dict(extract_state_dict(state_dict, "actor_critic"))
+                self.download_progress = 100
+                self.loading_status = "Model loaded successfully!"
+                return True
+            except Exception as e:
+                logger.error(f"Failed to load model from URL: {e}")
+                return False
+        # Run in thread pool to avoid blocking
         loop = asyncio.get_event_loop()
         with concurrent.futures.ThreadPoolExecutor() as executor:
+            success = await loop.run_in_executor(executor, load_model_weights)
+        return success
     async def initialize_models(self):
         """Initialize the AI models and environment"""
             # Try to load checkpoint (remote first, then local, then dummy mode)
             try:
+                # First try to load from Hugging Face Hub using torch.hub
+                logger.info("Loading model from Hugging Face Hub with torch.hub...")
+                success = await self._load_model_from_url_async(agent, device)
+                if success:
+                    logger.info("Successfully loaded checkpoint from HF Hub")
+                else:
                     # Fallback to local checkpoint if available
+                    logger.warning("Failed to load from HF Hub, trying local checkpoint...")
                     checkpoint_path = web_config.get_checkpoint_path()
                     if checkpoint_path.exists():
+                        logger.info(f"Loading local checkpoint: {checkpoint_path}")
+                        self.loading_status = "Loading local checkpoint..."
                         agent.load(checkpoint_path)
                         logger.info(f"Successfully loaded local checkpoint: {checkpoint_path}")
                     else: