Spaces:

chenguittiMaroua
/

asm-app

Sleeping

App Files Files Community

chenguittiMaroua commited on Apr 11

Commit

360e004

verified ·

1 Parent(s): 7a504cf

Update main.py

Browse files

Files changed (1) hide show

main.py +109 -71

main.py CHANGED Viewed

@@ -433,12 +433,79 @@ def generate_visualization_code(df: pd.DataFrame, request: VisualizationRequest)
 from typing import Optional
 def interpret_natural_language(prompt: str, df_columns: list) -> Optional[VisualizationRequest]:
-    """Convert natural language prompt to visualization parameters"""
     if not prompt or not df_columns:
         return None
-    prompt = prompt.lower()
-    # [rest of your existing function...]
 # ===== DYNAMIC VISUALIZATION FUNCTIONS =====
 def read_any_excel(content: bytes) -> pd.DataFrame:
@@ -782,85 +849,56 @@ async def visualize_with_natural_language(
     style: str = Form("seaborn-v0_8")
 ):
     try:
-        # Debugging: Log incoming request
-        logger.info(f"Incoming request with file: {file.filename if file else 'None'}")
-        # Verify file exists and has content
-        if not file or not file.filename:
-            logger.error("No file uploaded")
-            raise HTTPException(400, "Please upload an Excel file")
-        # Read file content
-        content = await file.read()
-        if not content:
-            logger.error("Empty file uploaded")
-            raise HTTPException(400, "The uploaded file is empty")
-        # Verify Excel file extension
-        file_ext = file.filename.split('.')[-1].lower()
-        if file_ext not in {"xlsx", "xls"}:
-            logger.error(f"Unsupported file type: {file_ext}")
-            raise HTTPException(400, "Only Excel files (.xlsx, .xls) are supported")
-        # Read Excel file with multiple engine fallbacks
-        try:
-            df = pd.read_excel(BytesIO(content), engine='openpyxl')
-        except Exception as e:
-            logger.warning(f"Openpyxl failed, trying xlrd: {str(e)}")
-            try:
-                df = pd.read_excel(BytesIO(content), engine='xlrd')
-            except Exception as e:
-                logger.error(f"Excel read failed: {str(e)}")
-                raise HTTPException(400, "Failed to read Excel file - may be corrupt or password protected")
-        if df.empty:
-            logger.error("Empty DataFrame after reading Excel")
-            raise HTTPException(400, "Excel file contains no data")
-        # Generate prompt if empty
         if not prompt.strip():
             prompt = generate_smart_prompt(df)
             logger.info(f"Auto-generated prompt: {prompt}")
-        # Create visualization
-        vis_request = interpret_natural_language(prompt, df.columns.tolist())
-        if not vis_request:
-            logger.error("Could not interpret visualization request")
-            raise HTTPException(400, "Could not understand your visualization request")
         vis_request.style = style
-        # Generate visualization
-        try:
-            visualization_code = generate_dynamic_visualization_code(df, vis_request)
-            plt.style.use(vis_request.style)
-            fig, ax = plt.subplots(figsize=(10, 6))
-            exec(visualization_code, {'plt': plt, 'sns': sns, 'df': df, 'np': np})
-            buffer = BytesIO()
-            plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
-            plt.close()
-            buffer.seek(0)
-            return {
-                "status": "success",
-                "image_data": base64.b64encode(buffer.getvalue()).decode('utf-8'),
-                "code": visualization_code,
-                "columns": list(df.columns),
-                "prompt": prompt
-            }
-        except Exception as e:
-            logger.error(f"Visualization failed: {str(e)}")
-            raise HTTPException(400, f"Failed to generate visualization: {str(e)}")
     except HTTPException as he:
         raise
     except Exception as e:
         logger.error(f"Unexpected error: {traceback.format_exc()}")
-        raise HTTPException(500, "Internal server error")

 from typing import Optional
 def interpret_natural_language(prompt: str, df_columns: list) -> Optional[VisualizationRequest]:
+    """Convert natural language prompt to visualization parameters with enhanced parsing"""
     if not prompt or not df_columns:
         return None
+    # Normalize the prompt and columns
+    prompt = prompt.lower().strip()
+    normalized_columns = [col.lower().strip() for col in df_columns]
+    # Initialize default values
+    chart_type = "bar"
+    x_col = None
+    y_col = None
+    hue_col = None
+    title = f"Visualization of {prompt[:50]}"  # Default title
+    # Common chart type detection
+    chart_keywords = {
+        "line": ["line", "trend", "over time"],
+        "bar": ["bar", "compare", "comparison"],
+        "scatter": ["scatter", "correlation", "relationship"],
+        "histogram": ["histogram", "distribution", "frequency"],
+        "boxplot": ["box", "quartile", "distribution"],
+        "heatmap": ["heatmap", "correlation", "matrix"]
+    }
+    # Detect chart type
+    for chart, keywords in chart_keywords.items():
+        if any(keyword in prompt for keyword in keywords):
+            chart_type = chart
+            break
+    # Column detection with improved matching
+    for col in df_columns:
+        col_lower = col.lower()
+        # Check if column name appears in prompt
+        if col_lower in prompt:
+            # Look for context clues about the column's role
+            if not x_col and ("by " + col_lower in prompt or
+                            "for " + col_lower in prompt or
+                            "across " + col_lower in prompt):
+                x_col = col
+            elif not y_col and ("of " + col_lower in prompt or
+                              "show " + col_lower in prompt or
+                              "plot " + col_lower in prompt):
+                y_col = col
+            elif not hue_col and ("color by " + col_lower in prompt or
+                                "group by " + col_lower in prompt):
+                hue_col = col
+    # Fallback logic if columns not detected
+    if not x_col and len(df_columns) > 0:
+        x_col = df_columns[0]  # First column as default x-axis
+    if not y_col and len(df_columns) > 1:
+        # Try to find a numeric column for y-axis
+        numeric_cols = [col for col in df_columns if pd.api.types.is_numeric_dtype(df[col])]
+        y_col = numeric_cols[0] if numeric_cols else df_columns[1]
+    # Special handling for certain chart types
+    if chart_type == "heatmap":
+        x_col = None
+        y_col = None
+        hue_col = None
+    return VisualizationRequest(
+        chart_type=chart_type,
+        x_column=x_col,
+        y_column=y_col,
+        hue_column=hue_col,
+        title=title,
+        style="seaborn-v0_8"
+    )
 # ===== DYNAMIC VISUALIZATION FUNCTIONS =====
 def read_any_excel(content: bytes) -> pd.DataFrame:
     style: str = Form("seaborn-v0_8")
 ):
     try:
+        # [Previous file handling code remains the same until after df is created]
         if not prompt.strip():
             prompt = generate_smart_prompt(df)
             logger.info(f"Auto-generated prompt: {prompt}")
+        # Enhanced visualization request interpretation with better error feedback
+        try:
+            vis_request = interpret_natural_language(prompt, df.columns.tolist())
+            if not vis_request:
+                raise ValueError("Could not interpret the visualization request")
+            # Validate the request against the actual data
+            if vis_request.x_column and vis_request.x_column not in df.columns:
+                raise ValueError(f"Column '{vis_request.x_column}' not found in data")
+            if vis_request.y_column and vis_request.y_column not in df.columns:
+                raise ValueError(f"Column '{vis_request.y_column}' not found in data")
+            if vis_request.hue_column and vis_request.hue_column not in df.columns:
+                raise ValueError(f"Column '{vis_request.hue_column}' not found in data")
+        except ValueError as e:
+            logger.error(f"Visualization interpretation failed: {str(e)}")
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": "Could not understand your visualization request",
+                    "message": str(e),
+                    "suggestions": [
+                        "Try being more specific (e.g., 'Show sales by region')",
+                        f"Available columns: {list(df.columns)}",
+                        "Supported chart types: line, bar, scatter, histogram, boxplot, heatmap"
+                    ],
+                    "your_prompt": prompt
+                }
+            )
         vis_request.style = style
+        # [Rest of your visualization code remains the same]
     except HTTPException as he:
         raise
     except Exception as e:
         logger.error(f"Unexpected error: {traceback.format_exc()}")
+        raise HTTPException(500, {
+            "error": "Internal server error",
+            "details": str(e)
+        })