chenguittiMaroua commited on
Commit
629bda0
·
verified ·
1 Parent(s): 4465d9c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +45 -44
main.py CHANGED
@@ -387,9 +387,7 @@ def generate_visualization_code(df: pd.DataFrame, request: VisualizationRequest)
387
  ])
388
 
389
  return "\n".join(code_lines)
390
- def interpret_natural_language(prompt: str, df_columns: list) -> VisualizationRequest:
391
- """Convert natural language prompt to visualization parameters"""
392
- prompt = prompt.lower()
393
 
394
  # Determine chart type
395
  chart_type = "bar"
@@ -432,65 +430,47 @@ def interpret_natural_language(prompt: str, df_columns: list) -> VisualizationRe
432
  title="Generated from: " + prompt[:50] + ("..." if len(prompt) > 50 else ""),
433
  style="seaborn-v0_8" # Updated default
434
  )
435
- def interpret_natural_language(prompt: str, df_columns: list) -> VisualizationRequest:
 
 
436
  """Convert natural language prompt to visualization parameters"""
 
 
 
437
  prompt = prompt.lower()
 
438
 
439
  # ===== DYNAMIC VISUALIZATION FUNCTIONS =====
440
  def read_any_excel(content: bytes) -> pd.DataFrame:
441
  """Read any Excel file with automatic type detection"""
442
  try:
443
- # First pass to detect datetime columns
444
- df = pd.read_excel(
445
- io.BytesIO(content),
446
- engine='openpyxl',
447
- nrows=10 # Only read first few rows for detection
448
- )
449
-
450
- # Identify likely datetime columns
451
- date_cols = [
452
- col for col in df.columns
453
- if df[col].dtype == 'object' and
454
- any(is_date_like(str(x)) for x in df[col].head() if pd.notna(x))
455
- ]
456
-
457
- # Full read with proper typing
458
  df = pd.read_excel(
459
  io.BytesIO(content),
460
  engine='openpyxl',
461
- parse_dates=date_cols,
462
- date_parser=lambda x: pd.to_datetime(x, errors='coerce'),
463
  dtype=object, # Read everything as object initially
464
  na_values=['', '#N/A', '#VALUE!', '#REF!', 'NULL', 'NA', 'N/A']
465
  )
466
 
467
  # Convert each column to best possible type
468
  for col in df.columns:
469
- # Skip if already datetime
470
- if pd.api.types.is_datetime64_any_dtype(df[col]):
 
471
  continue
 
 
472
 
473
- # Try numeric first
474
  try:
475
- df[col] = pd.to_numeric(df[col], errors='ignore')
476
- if df[col].dtype.kind in 'biufc': # Is numeric type
477
- continue
478
- except:
479
  pass
480
 
481
- # Then try datetime again (in case missed earlier)
482
- if df[col].dtype == 'object':
483
- try:
484
- df[col] = pd.to_datetime(df[col], errors='ignore')
485
- if pd.api.types.is_datetime64_any_dtype(df[col]):
486
- continue
487
- except:
488
- pass
489
-
490
  # Finally clean strings
491
- if df[col].dtype == 'object':
492
- df[col] = df[col].astype(str).str.strip()
493
- df[col] = df[col].replace(['nan', 'None', 'NaT', ''], None)
494
 
495
  return df
496
 
@@ -498,6 +478,13 @@ def read_any_excel(content: bytes) -> pd.DataFrame:
498
  logger.error(f"Excel reading failed: {str(e)}")
499
  raise HTTPException(422, f"Could not process Excel file: {str(e)}")
500
 
 
 
 
 
 
 
 
501
  def is_date_like(s: str) -> bool:
502
  """Helper to detect date-like strings"""
503
  date_patterns = [
@@ -804,14 +791,16 @@ async def visualize_with_natural_language(
804
  if not prompt.strip():
805
  prompt = generate_smart_prompt(df)
806
 
807
- # Generate visualization
808
- vis_request = interpret_natural_language(prompt, df.columns)
 
 
 
809
  vis_request.style = style
810
 
811
- # Generate and return visualization
812
  visualization_code = generate_dynamic_visualization_code(df, vis_request)
813
 
814
- # [Rest of your existing visualization execution code...]
815
  # Create the plot in memory
816
  plt.style.use(vis_request.style)
817
  fig, ax = plt.subplots(figsize=(10, 6))
@@ -836,6 +825,18 @@ async def visualize_with_natural_language(
836
  "interpreted_parameters": vis_request.dict()
837
  }
838
 
 
 
 
 
 
 
 
 
 
 
 
 
839
  except HTTPException:
840
  raise
841
  except Exception as e:
 
387
  ])
388
 
389
  return "\n".join(code_lines)
390
+
 
 
391
 
392
  # Determine chart type
393
  chart_type = "bar"
 
430
  title="Generated from: " + prompt[:50] + ("..." if len(prompt) > 50 else ""),
431
  style="seaborn-v0_8" # Updated default
432
  )
433
+ from typing import Optional
434
+
435
+ def interpret_natural_language(prompt: str, df_columns: list) -> Optional[VisualizationRequest]:
436
  """Convert natural language prompt to visualization parameters"""
437
+ if not prompt or not df_columns:
438
+ return None
439
+
440
  prompt = prompt.lower()
441
+ # [rest of your existing function...]
442
 
443
  # ===== DYNAMIC VISUALIZATION FUNCTIONS =====
444
  def read_any_excel(content: bytes) -> pd.DataFrame:
445
  """Read any Excel file with automatic type detection"""
446
  try:
447
+ # First read without parsing dates to detect datetime columns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  df = pd.read_excel(
449
  io.BytesIO(content),
450
  engine='openpyxl',
 
 
451
  dtype=object, # Read everything as object initially
452
  na_values=['', '#N/A', '#VALUE!', '#REF!', 'NULL', 'NA', 'N/A']
453
  )
454
 
455
  # Convert each column to best possible type
456
  for col in df.columns:
457
+ # First try numeric conversion
458
+ try:
459
+ df[col] = pd.to_numeric(df[col])
460
  continue
461
+ except (ValueError, TypeError):
462
+ pass
463
 
464
+ # Then try datetime with explicit format
465
  try:
466
+ df[col] = pd.to_datetime(df[col], format='mixed')
467
+ continue
468
+ except (ValueError, TypeError):
 
469
  pass
470
 
 
 
 
 
 
 
 
 
 
471
  # Finally clean strings
472
+ df[col] = df[col].astype(str).str.strip()
473
+ df[col] = df[col].replace(['nan', 'None', 'NaT', ''], None)
 
474
 
475
  return df
476
 
 
478
  logger.error(f"Excel reading failed: {str(e)}")
479
  raise HTTPException(422, f"Could not process Excel file: {str(e)}")
480
 
481
+
482
+
483
+
484
+ except Exception as e:
485
+ logger.error(f"Excel reading failed: {str(e)}")
486
+ raise HTTPException(422, f"Could not process Excel file: {str(e)}")
487
+
488
  def is_date_like(s: str) -> bool:
489
  """Helper to detect date-like strings"""
490
  date_patterns = [
 
791
  if not prompt.strip():
792
  prompt = generate_smart_prompt(df)
793
 
794
+ # Generate visualization request
795
+ vis_request = interpret_natural_language(prompt, df.columns.tolist())
796
+ if not vis_request:
797
+ raise HTTPException(400, "Could not interpret visualization request from prompt")
798
+
799
  vis_request.style = style
800
 
801
+ # Generate the visualization code
802
  visualization_code = generate_dynamic_visualization_code(df, vis_request)
803
 
 
804
  # Create the plot in memory
805
  plt.style.use(vis_request.style)
806
  fig, ax = plt.subplots(figsize=(10, 6))
 
825
  "interpreted_parameters": vis_request.dict()
826
  }
827
 
828
+ except HTTPException:
829
+ raise
830
+ except Exception as e:
831
+ logger.error(f"Natural language visualization failed: {str(e)}\n{traceback.format_exc()}")
832
+ raise HTTPException(500, detail=f"Visualization failed: {str(e)}")
833
+ return {
834
+ "status": "success",
835
+ "image_data": image_base64,
836
+ "code": visualization_code,
837
+ "interpreted_parameters": vis_request.dict()
838
+ }
839
+
840
  except HTTPException:
841
  raise
842
  except Exception as e: