Spaces:

BladeSzaSza
/

digiPal

Paused

BladeSzaSza Claude commited on Jun 26

Commit

5ed6938

1 Parent(s): 4cfc1e9

integrate Hunyuan3D API via gradio_client

- Replace local model loading with API calls to tencent/Hunyuan3D-2.1
- Use generation_all endpoint for both shape and texture generation
- Add proper image handling and temporary file management
- Maintain fallback 3D generation for reliability
- Remove deprecated transformers-based approach

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show

models/model_3d_generator.py +111 -275

models/model_3d_generator.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Union, Optional, Dict, Any
 from pathlib import Path
 import os
 import logging
 # Set up detailed logging for 3D generation
 logging.basicConfig(level=logging.INFO)
@@ -62,151 +63,38 @@ class Hunyuan3DGenerator:
             return False
     def load_model(self):
-        """Lazy load the 3D generation model"""
         if self.model is None:
-            logger.info("🚀 Starting 3D model loading process...")
             try:
-                # Try to import Hunyuan3D components
-                logger.info("📦 Attempting to import Hunyuan3D components...")
                 try:
-                    from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline
-                    from hy3dshape.rembg import BackgroundRemover
-                    logger.info("✅ Hunyuan3D components imported successfully")
-                    # Load the pipeline
-                    model_id = self.lite_model_id if self.use_lite else self.model_id
-                    logger.info(f"📦 Loading Hunyuan3D pipeline: {model_id}")
-                    self.model = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(model_id)
-                    self.bg_remover = BackgroundRemover()
-                    logger.info("✅ Hunyuan3D pipeline loaded successfully")
                 except ImportError as import_error:
-                    logger.error(f"❌ Failed to import Hunyuan3D components: {import_error}")
-                    logger.info("🔄 Hunyuan3D not installed, trying alternative approach...")
-                    # Fallback: Try using transformers AutoModel
-                    logger.info("📦 Importing transformers components...")
-                    from transformers import AutoModel, AutoProcessor
-                    model_id = self.lite_model_id if self.use_lite else self.model_id
-                    logger.info(f"📦 Loading model: {model_id}")
-                    # Check if model exists on HuggingFace
-                    try:
-                        from huggingface_hub import model_info
-                        info = model_info(model_id)
-                        logger.info(f"✅ Model found on HuggingFace: {info.modelId}")
-                    except Exception as hub_error:
-                        logger.error(f"❌ Model not found on HuggingFace: {hub_error}")
-                        logger.info("🔄 Using fallback 3D generation")
-                        self.model = "fallback"
-                        return
-                    # Load preprocessor
-                    logger.info("📦 Loading preprocessor...")
-                    try:
-                        self.preprocessor = AutoProcessor.from_pretrained(model_id)
-                        logger.info("✅ Preprocessor loaded successfully")
-                    except Exception as proc_error:
-                        logger.error(f"❌ Preprocessor loading failed: {proc_error}")
-                        logger.info("🔄 Using fallback mode")
-                        self.model = "fallback"
-                        return
-                    # Load model with optimizations
-                    torch_dtype = torch.float16 if self.device == "cuda" else torch.float32
-                    logger.info(f"📦 Using torch dtype: {torch_dtype}")
-                    # Disable torch.compile to avoid dynamo issues
-                    logger.info("📦 Disabling torch compile to avoid dynamo issues...")
-                    torch._dynamo.config.suppress_errors = True
-                    logger.info("📦 Loading 3D model with safe device handling...")
-                    # Try loading with different strategies
-                    loading_successful = False
-                    # Strategy 1: Load directly to device
-                    try:
-                        logger.info("📦 Strategy 1: Direct device loading...")
-                        self.model = AutoModel.from_pretrained(
-                            model_id,
-                            torch_dtype=torch_dtype,
-                            device_map={"": self.device},
-                            low_cpu_mem_usage=True,
-                            trust_remote_code=True
-                        )
-                        loading_successful = True
-                        logger.info("✅ Direct device loading successful")
-                    except Exception as e1:
-                        logger.error(f"❌ Strategy 1 failed: {e1}")
-                    # Strategy 2: Load to CPU first
-                    if not loading_successful:
-                        try:
-                            logger.info("📦 Strategy 2: CPU-first loading...")
-                            # Load model to CPU first to avoid meta tensor issues
-                            self.model = AutoModel.from_pretrained(
-                                model_id,
-                                torch_dtype=torch.float32,  # Use float32 for CPU loading
-                                low_cpu_mem_usage=True,
-                                device_map=None,  # No device mapping initially
-                                trust_remote_code=True
-                            )
-                            logger.info("✅ 3D model loaded to CPU")
-                            # Now safely move to target device
-                            logger.info(f"📦 Moving model to target device: {self.device}")
-                            try:
-                                if self.device == "cuda":
-                                    # Convert to appropriate dtype for GPU
-                                    self.model = self.model.to(device=self.device, dtype=torch.float16)
-                                    logger.info("✅ Model moved to CUDA with fp16")
-                                else:
-                                    # Keep on CPU
-                                    self.model = self.model.to(device="cpu", dtype=torch.float32)
-                                    logger.info("✅ Model kept on CPU with fp32")
-                                loading_successful = True
-                            except Exception as device_error:
-                                logger.error(f"❌ Device movement failed: {device_error}")
-                                logger.info("🔄 Falling back to CPU...")
-                                self.device = "cpu"
-                                if self.model is not None:
-                                    self.model = self.model.to("cpu", dtype=torch.float32)
-                                    loading_successful = True
-                                else:
-                                    logger.error("❌ Model is None, using fallback mode")
-                                    self.model = "fallback"
-                        except Exception as e2:
-                            logger.error(f"❌ Strategy 2 failed: {e2}")
-                    # If all strategies failed, use fallback
-                    if not loading_successful:
-                        logger.error("❌ All loading strategies failed")
-                        logger.info("🔄 Using fallback 3D generation")
-                        self.model = "fallback"
-                        return
-                    # Enable optimizations safely
-                    logger.info("📦 Applying model optimizations...")
-                    if self.model != "fallback" and hasattr(self.model, 'enable_attention_slicing'):
-                        self.model.enable_attention_slicing()
-                        logger.info("✅ Attention slicing enabled")
-                    else:
-                        logger.info("⚠️ Attention slicing not available")
-                    logger.info("🎉 3D model loading completed successfully!")
             except Exception as e:
-                logger.error(f"❌ Failed to load Hunyuan3D model: {e}")
-                logger.error(f"❌ Error type: {type(e).__name__}")
                 logger.info("🔄 Falling back to simple 3D generation...")
-                # Model loading failed, will use fallback
-                self.model = "fallback"
     def image_to_3d(self,
                    image: Union[str, Image.Image, np.ndarray],
@@ -229,7 +117,7 @@ class Hunyuan3DGenerator:
                 logger.info("✅ Model already loaded")
             # If model loading failed, use fallback
-            if self.model == "fallback":
                 logger.info("🔄 Using fallback 3D generation...")
                 return self._generate_fallback_3d(image)
@@ -237,124 +125,78 @@ class Hunyuan3DGenerator:
             logger.info("🖼️ Preparing input image...")
             if isinstance(image, str):
                 logger.info(f"🖼️ Loading image from path: {image}")
                 image = Image.open(image)
             elif isinstance(image, np.ndarray):
                 logger.info("🖼️ Converting numpy array to PIL Image")
                 image = Image.fromarray(image)
             else:
                 logger.info("🖼️ Input is already PIL Image")
-            # Ensure RGBA for Hunyuan3D
-            logger.info(f"🖼️ Image mode: {image.mode}")
-            if image.mode != 'RGBA':
-                logger.info("🖼️ Converting image to RGBA mode")
-                image = image.convert('RGBA')
-            logger.info(f"🖼️ Final image size: {image.size}")
-            # Remove background if requested
-            if remove_background and image.mode == 'RGB':
-                logger.info("🎭 Removing background from image...")
-                try:
-                    if hasattr(self, 'bg_remover'):
-                        # Use Hunyuan3D's background remover
-                        image = self.bg_remover(image)
-                        logger.info("✅ Background removed using Hunyuan3D remover")
-                    else:
-                        # Use fallback background removal
-                        image = self._remove_background(image)
-                        logger.info("✅ Background removed using fallback method")
-                except Exception as bg_error:
-                    logger.error(f"❌ Background removal failed: {bg_error}")
-                    logger.info("🔄 Continuing with original image...")
-            # Check if we have the Hunyuan3D pipeline
-            if hasattr(self.model, '__call__') and hasattr(self, 'bg_remover'):
-                # Using Hunyuan3D pipeline
-                logger.info("🧠 Using Hunyuan3D pipeline for 3D generation...")
                 try:
-                    # Generate 3D model using Hunyuan3D
-                    logger.info("🚀 Starting Hunyuan3D generation...")
-                    mesh_outputs = self.model(image=image)
-                    if isinstance(mesh_outputs, list) and len(mesh_outputs) > 0:
-                        mesh = mesh_outputs[0]
-                        logger.info("✅ 3D mesh generated successfully")
-                        # Save mesh
-                        logger.info("💾 Saving generated mesh...")
-                        mesh_path = self._save_mesh(mesh)
-                        logger.info(f"✅ Mesh saved to: {mesh_path}")
-                        return mesh_path
                     else:
-                        logger.error("❌ No mesh output from Hunyuan3D")
-                        raise Exception("Empty mesh output")
-                except Exception as hunyuan_error:
-                    logger.error(f"❌ Hunyuan3D generation failed: {hunyuan_error}")
                     logger.info("🔄 Falling back to alternative generation...")
                     return self._generate_fallback_3d(image)
             else:
-                # Using transformers-based approach (original code)
-                logger.info("🧠 Using transformers-based 3D generation...")
-                # Resize for processing
-                logger.info("🖼️ Resizing image for processing (512x512)...")
-                image = image.resize((512, 512), Image.Resampling.LANCZOS)
-                logger.info("✅ Image resized successfully")
-                # Process with model
-                logger.info("🧠 Starting model inference...")
-                with torch.no_grad():
-                    try:
-                        # Preprocess image
-                        logger.info("🔄 Preprocessing image for model...")
-                        inputs = self.preprocessor(images=image, return_tensors="pt")
-                        logger.info(f"🔄 Input tensor shape: {inputs['pixel_values'].shape if 'pixel_values' in inputs else 'unknown'}")
-                        # Move inputs to device safely
-                        logger.info(f"🔄 Moving inputs to device: {self.device}")
-                        try:
-                            # Avoid device-related dynamo issues
-                            device_str = str(self.device)  # Convert to string to avoid torch.device in dynamo
-                            inputs = {k: v.to(device_str) for k, v in inputs.items() if hasattr(v, 'to')}
-                            logger.info("✅ Inputs moved to device successfully")
-                        except Exception as device_error:
-                            logger.error(f"❌ Failed to move inputs to device: {device_error}")
-                            raise device_error
-                        # Generate 3D
-                        logger.info("🚀 Starting 3D generation inference...")
-                        logger.info(f"🚀 Parameters: steps={self.num_inference_steps}, guidance={self.guidance_scale}")
-                        outputs = self.model.generate(
-                            **inputs,
-                            num_inference_steps=self.num_inference_steps,
-                            guidance_scale=self.guidance_scale,
-                            texture_resolution=texture_resolution
-                        )
-                        logger.info("✅ 3D generation completed successfully")
-                        # Extract mesh
-                        logger.info("🔧 Extracting mesh from model outputs...")
-                        mesh = self._extract_mesh(outputs)
-                        logger.info("✅ Mesh extraction completed")
-                    except Exception as inference_error:
-                        logger.error(f"❌ Model inference failed: {inference_error}")
-                        logger.error(f"❌ Inference error type: {type(inference_error).__name__}")
-                        raise inference_error
-                # Save mesh
-                logger.info("💾 Saving generated mesh...")
-                mesh_path = self._save_mesh(mesh)
-                logger.info(f"✅ Mesh saved to: {mesh_path}")
-                logger.info("🎉 3D generation process completed successfully!")
-                return mesh_path
         except Exception as e:
             logger.error(f"❌ 3D generation error: {e}")
@@ -387,40 +229,6 @@ class Hunyuan3DGenerator:
             image.putdata(new_data)
             return image
-    def _extract_mesh(self, model_outputs: Dict[str, Any]) -> trimesh.Trimesh:
-        """Extract mesh from model outputs"""
-        # This would depend on actual Hunyuan3D output format
-        # Placeholder implementation
-        if 'vertices' in model_outputs and 'faces' in model_outputs:
-            vertices = model_outputs['vertices'].cpu().numpy()
-            faces = model_outputs['faces'].cpu().numpy()
-            # Create trimesh object
-            mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
-            # Add texture if available
-            if 'texture' in model_outputs:
-                # Apply texture to mesh
-                pass
-            return mesh
-        else:
-            # Create a simple mesh if outputs are different
-            return self._create_simple_mesh()
-    def _create_simple_mesh(self) -> trimesh.Trimesh:
-        """Create a simple placeholder mesh"""
-        # Create a simple sphere as placeholder
-        mesh = trimesh.creation.icosphere(subdivisions=3, radius=1.0)
-        # Add some variation
-        mesh.vertices += np.random.normal(0, 0.05, mesh.vertices.shape)
-        # Smooth the mesh
-        mesh = mesh.smoothed()
-        return mesh
     def _generate_fallback_3d(self, image: Union[Image.Image, np.ndarray]) -> str:
         """Generate fallback 3D model when main model fails"""
@@ -494,6 +302,36 @@ class Hunyuan3DGenerator:
         return mesh_path
     def text_to_3d(self, text_prompt: str) -> str:
         """Generate 3D model from text description"""
         # First generate image, then convert to 3D
@@ -501,15 +339,13 @@ class Hunyuan3DGenerator:
         raise NotImplementedError("Text to 3D requires image generation first")
     def to(self, device: str):
-        """Move model to specified device"""
         self.device = device
-        if self.model and self.model != "fallback":
-            self.model.to(device)
     def __del__(self):
         """Cleanup when object is destroyed"""
-        if self.model and self.model != "fallback":
-            del self.model
-        if self.preprocessor:
-            del self.preprocessor
-        torch.cuda.empty_cache()

 from pathlib import Path
 import os
 import logging
+import random
 # Set up detailed logging for 3D generation
 logging.basicConfig(level=logging.INFO)
             return False
     def load_model(self):
+        """Initialize Gradio client for Hunyuan3D API"""
         if self.model is None:
+            logger.info("🚀 Starting Hunyuan3D API client initialization...")
             try:
+                # Try to import gradio_client
+                logger.info("📦 Attempting to import gradio_client...")
                 try:
+                    from gradio_client import Client, handle_file
+                    logger.info("✅ gradio_client imported successfully")
+                    # Initialize Hunyuan3D client
+                    logger.info("🌐 Connecting to Hunyuan3D API...")
+                    self.client = Client("tencent/Hunyuan3D-2.1")
+                    self.handle_file = handle_file
+                    self.model = "gradio_api"
+                    logger.info("✅ Hunyuan3D API client initialized successfully")
                 except ImportError as import_error:
+                    logger.error(f"❌ Failed to import gradio_client: {import_error}")
+                    logger.info("💡 Please install gradio_client:")
+                    logger.info("   pip install gradio_client")
+                    logger.info("🔄 Using fallback mode instead...")
+                    self.model = "fallback_mode"
+                    return
             except Exception as e:
+                logger.error(f"❌ Failed to initialize Hunyuan3D API client: {e}")
                 logger.info("🔄 Falling back to simple 3D generation...")
+                self.model = "fallback_mode"
     def image_to_3d(self,
                    image: Union[str, Image.Image, np.ndarray],
                 logger.info("✅ Model already loaded")
             # If model loading failed, use fallback
+            if self.model == "fallback_mode":
                 logger.info("🔄 Using fallback 3D generation...")
                 return self._generate_fallback_3d(image)
             logger.info("🖼️ Preparing input image...")
             if isinstance(image, str):
                 logger.info(f"🖼️ Loading image from path: {image}")
+                image_path = image
                 image = Image.open(image)
             elif isinstance(image, np.ndarray):
                 logger.info("🖼️ Converting numpy array to PIL Image")
                 image = Image.fromarray(image)
+                # Save to temp file for gradio client
+                image_path = self._save_temp_image(image)
             else:
                 logger.info("🖼️ Input is already PIL Image")
+                # Save to temp file for gradio client
+                image_path = self._save_temp_image(image)
+            logger.info(f"🖼️ Image mode: {image.mode}, size: {image.size}")
+            # Check if we have the Gradio API client
+            if self.model == "gradio_api" and hasattr(self, 'client'):
+                logger.info("🌐 Using Hunyuan3D Gradio API for 3D generation...")
                 try:
+                    # Generate 3D model using Hunyuan3D API
+                    logger.info("🚀 Starting Hunyuan3D API generation...")
+                    # Use generation_all for both shape and texture
+                    logger.info("📤 Calling generation_all API...")
+                    result = self.client.predict(
+                        image=self.handle_file(image_path),
+                        mv_image_front=None,
+                        mv_image_back=None,
+                        mv_image_left=None,
+                        mv_image_right=None,
+                        steps=self.num_inference_steps,
+                        guidance_scale=self.guidance_scale,
+                        seed=random.randint(1, 10000),
+                        octree_resolution=self.resolution,
+                        check_box_rembg=remove_background,
+                        num_chunks=8000,
+                        randomize_seed=True,
+                        api_name="/generation_all"
+                    )
+                    logger.info("✅ API call completed successfully")
+                    logger.info(f"📊 Result type: {type(result)}, length: {len(result) if isinstance(result, (list, tuple)) else 'N/A'}")
+                    # Extract mesh file from result
+                    # Result format: [shape_file, texture_file, html_output, mesh_stats, seed]
+                    if isinstance(result, (list, tuple)) and len(result) >= 2:
+                        shape_file = result[0]  # Shape file path
+                        texture_file = result[1]  # Textured file path (if available)
+                        # Use textured file if available, otherwise use shape file
+                        mesh_file = texture_file if texture_file else shape_file
+                        logger.info(f"✅ Generated mesh file: {mesh_file}")
+                        # Copy to our output location
+                        output_path = self._save_output_mesh(mesh_file)
+                        logger.info(f"✅ Mesh saved to: {output_path}")
+                        return output_path
                     else:
+                        logger.error("❌ Unexpected result format from Hunyuan3D API")
+                        raise Exception("Invalid API response format")
+                except Exception as api_error:
+                    logger.error(f"❌ Hunyuan3D API generation failed: {api_error}")
                     logger.info("🔄 Falling back to alternative generation...")
                     return self._generate_fallback_3d(image)
             else:
+                # Fallback to simple 3D generation
+                logger.info("🔄 No API client available, using fallback...")
+                return self._generate_fallback_3d(image)
         except Exception as e:
             logger.error(f"❌ 3D generation error: {e}")
             image.putdata(new_data)
             return image
     def _generate_fallback_3d(self, image: Union[Image.Image, np.ndarray]) -> str:
         """Generate fallback 3D model when main model fails"""
         return mesh_path
+    def _save_temp_image(self, image: Image.Image) -> str:
+        """Save PIL image to temporary file for gradio client"""
+        with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
+            image_path = tmp.name
+        # Save image
+        image.save(image_path, 'PNG')
+        logger.info(f"💾 Saved temp image to: {image_path}")
+        return image_path
+    def _save_output_mesh(self, source_mesh_path: str) -> str:
+        """Copy generated mesh to our output location"""
+        import shutil
+        # Create output directory if it doesn't exist
+        output_dir = "/tmp/hunyuan3d_output"
+        os.makedirs(output_dir, exist_ok=True)
+        # Generate unique filename
+        timestamp = tempfile.mktemp().split('/')[-1]
+        output_filename = f"hunyuan3d_mesh_{timestamp}.glb"
+        output_path = os.path.join(output_dir, output_filename)
+        # Copy the file
+        shutil.copy2(source_mesh_path, output_path)
+        logger.info(f"📁 Copied mesh from {source_mesh_path} to {output_path}")
+        return output_path
     def text_to_3d(self, text_prompt: str) -> str:
         """Generate 3D model from text description"""
         # First generate image, then convert to 3D
         raise NotImplementedError("Text to 3D requires image generation first")
     def to(self, device: str):
+        """Update device preference"""
         self.device = device
+        logger.info(f"🔧 Device preference updated to: {device}")
     def __del__(self):
         """Cleanup when object is destroyed"""
+        if hasattr(self, 'client'):
+            del self.client
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()