Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -387,9 +387,7 @@ def generate_visualization_code(df: pd.DataFrame, request: VisualizationRequest)
|
|
| 387 |
])
|
| 388 |
|
| 389 |
return "\n".join(code_lines)
|
| 390 |
-
|
| 391 |
-
"""Convert natural language prompt to visualization parameters"""
|
| 392 |
-
prompt = prompt.lower()
|
| 393 |
|
| 394 |
# Determine chart type
|
| 395 |
chart_type = "bar"
|
|
@@ -432,65 +430,47 @@ def interpret_natural_language(prompt: str, df_columns: list) -> VisualizationRe
|
|
| 432 |
title="Generated from: " + prompt[:50] + ("..." if len(prompt) > 50 else ""),
|
| 433 |
style="seaborn-v0_8" # Updated default
|
| 434 |
)
|
| 435 |
-
|
|
|
|
|
|
|
| 436 |
"""Convert natural language prompt to visualization parameters"""
|
|
|
|
|
|
|
|
|
|
| 437 |
prompt = prompt.lower()
|
|
|
|
| 438 |
|
| 439 |
# ===== DYNAMIC VISUALIZATION FUNCTIONS =====
|
| 440 |
def read_any_excel(content: bytes) -> pd.DataFrame:
|
| 441 |
"""Read any Excel file with automatic type detection"""
|
| 442 |
try:
|
| 443 |
-
# First
|
| 444 |
-
df = pd.read_excel(
|
| 445 |
-
io.BytesIO(content),
|
| 446 |
-
engine='openpyxl',
|
| 447 |
-
nrows=10 # Only read first few rows for detection
|
| 448 |
-
)
|
| 449 |
-
|
| 450 |
-
# Identify likely datetime columns
|
| 451 |
-
date_cols = [
|
| 452 |
-
col for col in df.columns
|
| 453 |
-
if df[col].dtype == 'object' and
|
| 454 |
-
any(is_date_like(str(x)) for x in df[col].head() if pd.notna(x))
|
| 455 |
-
]
|
| 456 |
-
|
| 457 |
-
# Full read with proper typing
|
| 458 |
df = pd.read_excel(
|
| 459 |
io.BytesIO(content),
|
| 460 |
engine='openpyxl',
|
| 461 |
-
parse_dates=date_cols,
|
| 462 |
-
date_parser=lambda x: pd.to_datetime(x, errors='coerce'),
|
| 463 |
dtype=object, # Read everything as object initially
|
| 464 |
na_values=['', '#N/A', '#VALUE!', '#REF!', 'NULL', 'NA', 'N/A']
|
| 465 |
)
|
| 466 |
|
| 467 |
# Convert each column to best possible type
|
| 468 |
for col in df.columns:
|
| 469 |
-
#
|
| 470 |
-
|
|
|
|
| 471 |
continue
|
|
|
|
|
|
|
| 472 |
|
| 473 |
-
#
|
| 474 |
try:
|
| 475 |
-
df[col] = pd.
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
except:
|
| 479 |
pass
|
| 480 |
|
| 481 |
-
# Then try datetime again (in case missed earlier)
|
| 482 |
-
if df[col].dtype == 'object':
|
| 483 |
-
try:
|
| 484 |
-
df[col] = pd.to_datetime(df[col], errors='ignore')
|
| 485 |
-
if pd.api.types.is_datetime64_any_dtype(df[col]):
|
| 486 |
-
continue
|
| 487 |
-
except:
|
| 488 |
-
pass
|
| 489 |
-
|
| 490 |
# Finally clean strings
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
df[col] = df[col].replace(['nan', 'None', 'NaT', ''], None)
|
| 494 |
|
| 495 |
return df
|
| 496 |
|
|
@@ -498,6 +478,13 @@ def read_any_excel(content: bytes) -> pd.DataFrame:
|
|
| 498 |
logger.error(f"Excel reading failed: {str(e)}")
|
| 499 |
raise HTTPException(422, f"Could not process Excel file: {str(e)}")
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
def is_date_like(s: str) -> bool:
|
| 502 |
"""Helper to detect date-like strings"""
|
| 503 |
date_patterns = [
|
|
@@ -804,14 +791,16 @@ async def visualize_with_natural_language(
|
|
| 804 |
if not prompt.strip():
|
| 805 |
prompt = generate_smart_prompt(df)
|
| 806 |
|
| 807 |
-
# Generate visualization
|
| 808 |
-
vis_request = interpret_natural_language(prompt, df.columns)
|
|
|
|
|
|
|
|
|
|
| 809 |
vis_request.style = style
|
| 810 |
|
| 811 |
-
# Generate
|
| 812 |
visualization_code = generate_dynamic_visualization_code(df, vis_request)
|
| 813 |
|
| 814 |
-
# [Rest of your existing visualization execution code...]
|
| 815 |
# Create the plot in memory
|
| 816 |
plt.style.use(vis_request.style)
|
| 817 |
fig, ax = plt.subplots(figsize=(10, 6))
|
|
@@ -836,6 +825,18 @@ async def visualize_with_natural_language(
|
|
| 836 |
"interpreted_parameters": vis_request.dict()
|
| 837 |
}
|
| 838 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 839 |
except HTTPException:
|
| 840 |
raise
|
| 841 |
except Exception as e:
|
|
|
|
| 387 |
])
|
| 388 |
|
| 389 |
return "\n".join(code_lines)
|
| 390 |
+
|
|
|
|
|
|
|
| 391 |
|
| 392 |
# Determine chart type
|
| 393 |
chart_type = "bar"
|
|
|
|
| 430 |
title="Generated from: " + prompt[:50] + ("..." if len(prompt) > 50 else ""),
|
| 431 |
style="seaborn-v0_8" # Updated default
|
| 432 |
)
|
| 433 |
+
from typing import Optional
|
| 434 |
+
|
| 435 |
+
def interpret_natural_language(prompt: str, df_columns: list) -> Optional[VisualizationRequest]:
|
| 436 |
"""Convert natural language prompt to visualization parameters"""
|
| 437 |
+
if not prompt or not df_columns:
|
| 438 |
+
return None
|
| 439 |
+
|
| 440 |
prompt = prompt.lower()
|
| 441 |
+
# [rest of your existing function...]
|
| 442 |
|
| 443 |
# ===== DYNAMIC VISUALIZATION FUNCTIONS =====
|
| 444 |
def read_any_excel(content: bytes) -> pd.DataFrame:
|
| 445 |
"""Read any Excel file with automatic type detection"""
|
| 446 |
try:
|
| 447 |
+
# First read without parsing dates to detect datetime columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
df = pd.read_excel(
|
| 449 |
io.BytesIO(content),
|
| 450 |
engine='openpyxl',
|
|
|
|
|
|
|
| 451 |
dtype=object, # Read everything as object initially
|
| 452 |
na_values=['', '#N/A', '#VALUE!', '#REF!', 'NULL', 'NA', 'N/A']
|
| 453 |
)
|
| 454 |
|
| 455 |
# Convert each column to best possible type
|
| 456 |
for col in df.columns:
|
| 457 |
+
# First try numeric conversion
|
| 458 |
+
try:
|
| 459 |
+
df[col] = pd.to_numeric(df[col])
|
| 460 |
continue
|
| 461 |
+
except (ValueError, TypeError):
|
| 462 |
+
pass
|
| 463 |
|
| 464 |
+
# Then try datetime with explicit format
|
| 465 |
try:
|
| 466 |
+
df[col] = pd.to_datetime(df[col], format='mixed')
|
| 467 |
+
continue
|
| 468 |
+
except (ValueError, TypeError):
|
|
|
|
| 469 |
pass
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
# Finally clean strings
|
| 472 |
+
df[col] = df[col].astype(str).str.strip()
|
| 473 |
+
df[col] = df[col].replace(['nan', 'None', 'NaT', ''], None)
|
|
|
|
| 474 |
|
| 475 |
return df
|
| 476 |
|
|
|
|
| 478 |
logger.error(f"Excel reading failed: {str(e)}")
|
| 479 |
raise HTTPException(422, f"Could not process Excel file: {str(e)}")
|
| 480 |
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
except Exception as e:
|
| 485 |
+
logger.error(f"Excel reading failed: {str(e)}")
|
| 486 |
+
raise HTTPException(422, f"Could not process Excel file: {str(e)}")
|
| 487 |
+
|
| 488 |
def is_date_like(s: str) -> bool:
|
| 489 |
"""Helper to detect date-like strings"""
|
| 490 |
date_patterns = [
|
|
|
|
| 791 |
if not prompt.strip():
|
| 792 |
prompt = generate_smart_prompt(df)
|
| 793 |
|
| 794 |
+
# Generate visualization request
|
| 795 |
+
vis_request = interpret_natural_language(prompt, df.columns.tolist())
|
| 796 |
+
if not vis_request:
|
| 797 |
+
raise HTTPException(400, "Could not interpret visualization request from prompt")
|
| 798 |
+
|
| 799 |
vis_request.style = style
|
| 800 |
|
| 801 |
+
# Generate the visualization code
|
| 802 |
visualization_code = generate_dynamic_visualization_code(df, vis_request)
|
| 803 |
|
|
|
|
| 804 |
# Create the plot in memory
|
| 805 |
plt.style.use(vis_request.style)
|
| 806 |
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
| 825 |
"interpreted_parameters": vis_request.dict()
|
| 826 |
}
|
| 827 |
|
| 828 |
+
except HTTPException:
|
| 829 |
+
raise
|
| 830 |
+
except Exception as e:
|
| 831 |
+
logger.error(f"Natural language visualization failed: {str(e)}\n{traceback.format_exc()}")
|
| 832 |
+
raise HTTPException(500, detail=f"Visualization failed: {str(e)}")
|
| 833 |
+
return {
|
| 834 |
+
"status": "success",
|
| 835 |
+
"image_data": image_base64,
|
| 836 |
+
"code": visualization_code,
|
| 837 |
+
"interpreted_parameters": vis_request.dict()
|
| 838 |
+
}
|
| 839 |
+
|
| 840 |
except HTTPException:
|
| 841 |
raise
|
| 842 |
except Exception as e:
|