Spaces:

destinyebuka
/

AIDA

Running

App Files Files Community

destinyebuka commited on 16 days ago

Commit

9f3c354

1 Parent(s): 366f758

100k prod

Browse files

Files changed (8) hide show

app/ai/nodes/draft_node.py +174 -129
app/ai/nodes/intent_node.py +266 -271
app/ai/nodes/search_node.py +372 -271
app/ai/routes/chat.py +237 -31
app/ai/service.py +331 -156
app/api/endpoints/monitoring.py +354 -0
app/core/rate_limiter.py +289 -0
main.py +7 -0

app/ai/nodes/draft_node.py CHANGED Viewed

@@ -1,18 +1,20 @@
-# app/ai/nodes/draft_node.py – FIXED: Don't regenerate when preview is active + error handling
 import datetime
 from typing import Dict
-from bson import ObjectId
 from app.database import get_db
-from app.ai.state import ListingDraft
-from structlog import get_logger
-from app.config import settings
 from app.ml.models.ml_listing_extractor import get_ml_extractor
-logger = get_logger(__name__)
 ml_extractor = get_ml_extractor()
-# ========== AMENITY ICONS MAPPING ==========
 AMENITY_ICONS = {
     "wifi": "📶",
     "parking": "🅿️",
@@ -31,16 +33,22 @@ AMENITY_ICONS = {
     "television": "📺",
 }
-# ========== CUSTOM EXCEPTIONS ==========
 class ValidationError(Exception):
-    """Raised when ML validation fails."""
     pass
 class DraftGenerationError(Exception):
     """Raised when draft generation fails."""
     pass
-# ========== HELPERS ==========
 def _add_amenity_icons(amenities: list) -> str:
     """Convert amenities list to string with icons."""
     if not amenities:
@@ -48,7 +56,7 @@ def _add_amenity_icons(amenities: list) -> str:
     icons_text = []
     for amenity in amenities:
         amenity_lower = amenity.lower().strip()
-        icon = AMENITY_ICONS.get(amenity_lower, "✅")
         icons_text.append(f"{icon} {amenity.title()}")
     return " | ".join(icons_text)
@@ -91,117 +99,148 @@ def _get_current_message(state: Dict) -> str:
         return full.split("Now the user says:")[-1].strip()
     return full.strip()
-# ---------- node ----------
 async def draft_node(state: Dict) -> Dict:
     """
-    LangGraph node:
-    - NEVER regenerate if preview is already shown
-    - Only run if status is exactly "draft_ready"
-    - After preview shown, let intent_node handle all commands
-    - ✅ Comprehensive error handling + logging
-    """
-    status = state.get("status")
-    # ===== CRITICAL: Exit immediately if preview already active =====
-    if status in {"preview_shown", "waiting_for_images"}:
-        logger.info("🛑 DRAFT NODE SKIPPED: Preview already active, not regenerating")
-        return state
-    # ===== Only proceed if status is EXACTLY "draft_ready" =====
-    if status != "draft_ready":
-        logger.info(f"🛑 DRAFT NODE SKIPPED: status={status}, not draft_ready")
-        return state
-    # ===== Only proceed if intent is "list" =====
-    if state.get("intent") != "list":
-        logger.info("🛑 DRAFT NODE SKIPPED: intent is not 'list'")
-        return state
-    # ===== If we get here, generate the draft =====
-    logger.info("✅ DRAFT NODE RUNNING: Generating draft preview")
-    user_id = state.get("user_id")
-    # ===== ML VALIDATION =====
-    try:
-        validation = ml_extractor.validate_all_fields(state, user_id)
-        if not validation["all_valid"]:
-            issues_text = "\n".join([f"❌ {issue}" for issue in validation["issues"]])
-            state["ai_reply"] = f"""I found some issues with your listing:
 {issues_text}
 Let me ask again - could you clarify these fields?"""
-            state["status"] = "collecting"
-            state["missing_fields"] = [
-                field for field, result in validation["field_validations"].items()
-                if not result["is_valid"]
-            ]
-            logger.warning("🚫 Fields failed ML validation", issues=validation["issues"])
-            return state
-        logger.info("✅ All fields passed ML validation", user_id=user_id)
-    except Exception as e:
-        logger.error("❌ ML validation error", exc_info=e)
-        state["ai_reply"] = "Sorry, I couldn't validate your listing. Please try again."
-        state["status"] = "error"
-        return state
-    # ===== DRAFT GENERATION =====
-    try:
-        # Generate title / description / icons
-        title = _generate_title(state)
-        description = _generate_description(state)
-        amenities_with_icons = _add_amenity_icons(state.get("amenities", []))
-        images = state.get("draft", {}).get("images", []) if isinstance(state.get("draft"), dict) else []
-        draft_preview = {
-            "title": title,
-            "description": description,
-            "location": state.get("location", "").title(),
-            "bedrooms": state.get("bedrooms"),
-            "bathrooms": state.get("bathrooms"),
-            "price": state.get("price"),
-            "price_type": state.get("price_type"),
-            "listing_type": state.get("listing_type"),
-            "amenities": state.get("amenities", []),
-            "amenities_with_icons": amenities_with_icons,
-            "requirements": state.get("requirements"),
-            "currency": state.get("currency", "XOF"),
-            "images": images,
-            "field_confidences": validation["field_validations"],
-        }
-        logger.info("🎯 Draft preview generated",
-                    title=title,
-                    location=state.get("location"),
-                    image_count=len(images),
-                    amenities=state.get("amenities", []))
-    except Exception as e:
-        logger.error("❌ Failed to generate draft preview", exc_info=e)
-        state["ai_reply"] = "Sorry, I couldn't generate your draft. Please try again."
-        state["status"] = "error"
-        return state
-    # ===== BUILD PREVIEW MESSAGE =====
-    try:
-        images_section = ""
-        if images:
-            images_section = f"\n📷 Images: {len(images)} uploaded\n"
-            for idx, img_url in enumerate(images[:3], 1):
-                images_section += f"   {idx}. {img_url[:60]}...\n"
-            if len(images) > 3:
-                images_section += f"   ... and {len(images) - 3} more\n"
-        preview_text = f"""
-┌──────────────────────────────────────────────────────────────────────────┐
-🏘   LISTING PREVIEW
-└──────────────────────────────────────────────────────────────────────────┘
 **{draft_preview['title']}**
 📍 Location: {draft_preview['location']}
-🛏  Bedrooms: {draft_preview['bedrooms']}
 🚿 Bathrooms: {draft_preview['bathrooms']}
 💰 Price: {draft_preview['price']:,} {draft_preview['price_type']} ({draft_preview['currency']})
@@ -209,30 +248,36 @@ Let me ask again - could you clarify these fields?"""
 ✨ Amenities: {draft_preview['amenities_with_icons'] if draft_preview['amenities_with_icons'] else 'None specified'}
 {images_section}
-└──────────────────────────────────────────────────────────────────────────┘
 """
-        if not images:
-            preview_text += """
 📸 Upload property images to make your listing more attractive!
 Then say **publish** to make it live!
 """
-            state["status"] = "waiting_for_images"
-        else:
-            preview_text += """
 ✅ Perfect! Say **publish** to make your listing live!
 """
-            state["status"] = "preview_shown"
-        state["draft_preview"] = draft_preview
-        state["ai_reply"] = preview_text
-        logger.info("✅ Draft node DONE: status set to preview_shown or waiting_for_images")
-        return state
-    except Exception as e:
-        logger.error("❌ Failed to build preview message", exc_info=e)
-        state["ai_reply"] = "Sorry, I encountered an error preparing your listing. Please try again."
-        state["status"] = "error"
-        return state

+# app/ai/nodes/draft_node.py – FINAL: Error handling + validation + observability
+import logging
 import datetime
 from typing import Dict
 from app.database import get_db
+from app.core.error_handling import trace_operation, handle_errors
 from app.ml.models.ml_listing_extractor import get_ml_extractor
+logger = logging.getLogger(__name__)
 ml_extractor = get_ml_extractor()
+# ============================================================
+# Amenity Icons Mapping
+# ============================================================
 AMENITY_ICONS = {
     "wifi": "📶",
     "parking": "🅿️",
     "television": "📺",
 }
+# ============================================================
+# Custom Exceptions
+# ============================================================
 class ValidationError(Exception):
+    """Raised when field validation fails."""
     pass
 class DraftGenerationError(Exception):
     """Raised when draft generation fails."""
     pass
+# ============================================================
+# Helpers
+# ============================================================
 def _add_amenity_icons(amenities: list) -> str:
     """Convert amenities list to string with icons."""
     if not amenities:
     icons_text = []
     for amenity in amenities:
         amenity_lower = amenity.lower().strip()
+        icon = AMENITY_ICONS.get(amenity_lower, "✔")
         icons_text.append(f"{icon} {amenity.title()}")
     return " | ".join(icons_text)
         return full.split("Now the user says:")[-1].strip()
     return full.strip()
+# ============================================================
+# Draft Node
+# ============================================================
+@handle_errors(default_return=None)
 async def draft_node(state: Dict) -> Dict:
     """
+    LangGraph node: Generate draft listing preview
+    Features:
+    - Prevents regeneration when preview active
+    - ML validation of all fields
+    - Professional title/description generation
+    - Amenity icon formatting
+    - Error handling with graceful fallback
+    - Full observability and logging
+    """
+    status = state.get("status")
+    with trace_operation(
+        "draft_node",
+        {
+            "status": status,
+            "intent": state.get("intent"),
+            "has_draft": state.get("draft_preview") is not None,
+        }
+    ):
+        # ===== CRITICAL: Exit immediately if preview already active =====
+        if status in {"preview_shown", "waiting_for_images"}:
+            logger.info("🛑 DRAFT NODE SKIPPED: Preview already active")
+            return state
+        # ===== Only proceed if status is EXACTLY "draft_ready" =====
+        if status != "draft_ready":
+            logger.info(f"🛑 DRAFT NODE SKIPPED: status={status}, not draft_ready")
+            return state
+        # ===== Only proceed if intent is "list" =====
+        if state.get("intent") != "list":
+            logger.info("🛑 DRAFT NODE SKIPPED: intent is not 'list'")
+            return state
+        # ===== If we get here, generate the draft =====
+        logger.info("✅ DRAFT NODE RUNNING: Generating draft preview")
+        user_id = state.get("user_id")
+        # ===== ML VALIDATION =====
+        with trace_operation("ml_validation"):
+            try:
+                validation = ml_extractor.validate_all_fields(state, user_id)
+                if not validation["all_valid"]:
+                    issues_text = "\n".join([f"❌ {issue}" for issue in validation["issues"]])
+                    state["ai_reply"] = f"""I found some issues with your listing:
 {issues_text}
 Let me ask again - could you clarify these fields?"""
+                    state["status"] = "collecting"
+                    state["missing_fields"] = [
+                        field for field, result in validation["field_validations"].items()
+                        if not result["is_valid"]
+                    ]
+                    logger.warning(f"🚫 Validation failed", extra={"issues": validation["issues"]})
+                    return state
+                logger.info(f"✅ All fields passed validation")
+            except Exception as e:
+                logger.error(f"❌ ML validation error: {e}", exc_info=True)
+                state["ai_reply"] = "Sorry, I couldn't validate your listing. Please try again."
+                state["status"] = "error"
+                return state
+        # ===== DRAFT GENERATION =====
+        with trace_operation("draft_generation"):
+            try:
+                # Generate components
+                title = _generate_title(state)
+                description = _generate_description(state)
+                amenities_with_icons = _add_amenity_icons(state.get("amenities", []))
+                images = (
+                    state.get("draft", {}).get("images", [])
+                    if isinstance(state.get("draft"), dict)
+                    else []
+                )
+                # Build draft preview
+                draft_preview = {
+                    "title": title,
+                    "description": description,
+                    "location": state.get("location", "").title(),
+                    "bedrooms": state.get("bedrooms"),
+                    "bathrooms": state.get("bathrooms"),
+                    "price": state.get("price"),
+                    "price_type": state.get("price_type"),
+                    "listing_type": state.get("listing_type"),
+                    "amenities": state.get("amenities", []),
+                    "amenities_with_icons": amenities_with_icons,
+                    "requirements": state.get("requirements"),
+                    "currency": state.get("currency", "XOF"),
+                    "images": images,
+                    "field_confidences": validation.get("field_validations", {}),
+                }
+                logger.info(
+                    f"🎯 Draft generated",
+                    extra={
+                        "title": title,
+                        "images": len(images),
+                        "amenities": len(state.get("amenities", [])),
+                    }
+                )
+            except Exception as e:
+                logger.error(f"❌ Failed to generate draft: {e}", exc_info=True)
+                state["ai_reply"] = "Sorry, I couldn't generate your draft. Please try again."
+                state["status"] = "error"
+                return state
+        # ===== BUILD PREVIEW MESSAGE =====
+        with trace_operation("build_preview_message"):
+            try:
+                images_section = ""
+                if images:
+                    images_section = f"\n📷 Images: {len(images)} uploaded\n"
+                    for idx, img_url in enumerate(images[:3], 1):
+                        images_section += f"   {idx}. {img_url[:60]}...\n"
+                    if len(images) > 3:
+                        images_section += f"   ... and {len(images) - 3} more\n"
+                preview_text = f"""
+┌─────────────────────────────────────────────────────────────────────────┐
+🏠  LISTING PREVIEW
+└─────────────────────────────────────────────────────────────────────────┘
 **{draft_preview['title']}**
 📍 Location: {draft_preview['location']}
+🛏 Bedrooms: {draft_preview['bedrooms']}
 🚿 Bathrooms: {draft_preview['bathrooms']}
 💰 Price: {draft_preview['price']:,} {draft_preview['price_type']} ({draft_preview['currency']})
 ✨ Amenities: {draft_preview['amenities_with_icons'] if draft_preview['amenities_with_icons'] else 'None specified'}
 {images_section}
+└─────────────────────────────────────────────────────────────────────────┘
 """
+                if not images:
+                    preview_text += """
 📸 Upload property images to make your listing more attractive!
 Then say **publish** to make it live!
 """
+                    state["status"] = "waiting_for_images"
+                    logger.info("⏳ Waiting for images")
+                else:
+                    preview_text += """
 ✅ Perfect! Say **publish** to make your listing live!
 """
+                    state["status"] = "preview_shown"
+                    logger.info("✅ Preview ready for publishing")
+                state["draft_preview"] = draft_preview
+                state["ai_reply"] = preview_text
+                logger.info(
+                    f"✅ Draft node completed",
+                    extra={"status": state["status"]}
+                )
+                return state
+            except Exception as e:
+                logger.error(f"❌ Failed to build preview: {e}", exc_info=True)
+                state["ai_reply"] = "Sorry, an error occurred preparing your listing. Please try again."
+                state["status"] = "error"
+                return state

app/ai/nodes/intent_node.py CHANGED Viewed

@@ -1,12 +1,19 @@
-# app/ai/nodes/intent_node.py – FIXED: Detect commands FIRST when preview is active
 import json
 import re
 from typing import Dict, List
 from tenacity import retry, stop_after_attempt, wait_exponential
-from structlog import get_logger
 from app.core.llm_router import call_llm_smart
 from app.core.context_manager import get_context_manager
-from app.config import settings
 from app.ml.models.ml_listing_extractor import get_ml_extractor
 from app.ai.nodes.draft_node import (
     _generate_title,
@@ -14,21 +21,30 @@ from app.ai.nodes.draft_node import (
     _add_amenity_icons,
 )
-logger = get_logger(__name__)
 MAX_TOKENS = 600
 TEMP = 0
 ml_extractor = get_ml_extractor()
-# ---------- helpers ----------
 def _load_system() -> str:
-    with open("app/ai/prompts/system_prompt.txt", encoding="utf-8") as f:
-        return f.read()
 SYSTEM_PROMPT = _load_system()
 def _clean_json(raw: str) -> str:
     cleaned = re.sub(r'```json\s*', '', raw)
     cleaned = re.sub(r'```\s*', '', cleaned)
     return cleaned.strip()
@@ -41,27 +57,25 @@ def _get_current_message(state: Dict) -> str:
     return full.strip()
 def _normalize_locations(location: str) -> str:
     if not location:
         return None
     loc_lower = location.lower().strip()
     location_map = {
-        "lago": "lagos", "lgs": "lagos", "lag": "lagos",
-        "cotnu": "cotonou", "cotonus": "cotonou", "cotou": "cotonou",
-        "akpakpa": "akpakpa", "nairobi": "nairobi", "nbi": "nairobi",
-        "accra": "accra", "acc": "accra", "joburg": "johannesburg",
-        "jozi": "johannesburg", "london": "london", "paris": "paris",
     }
     return location_map.get(loc_lower, location.lower())
 def _normalize_amenities(amenities: list) -> list:
     if not amenities:
         return []
     amenity_map = {
-        "balcno": "balcony", "balconny": "balcony", "parkng": "parking",
-        "park": "parking", "furnisd": "furnished", "furnishd": "furnished",
-        "furnish": "furnished", "ac": "air conditioning", "air cond": "air conditioning",
-        "aircond": "air conditioning", "swiming": "pool", "kitchn": "kitchen",
-        "gdn": "garden",
     }
     normalized = []
     for amenity in amenities:
@@ -74,32 +88,29 @@ def _normalize_amenities(amenities: list) -> list:
     return normalized
 def _normalize_price_type(price_type: str) -> str:
     if not price_type:
         return None
     pt_lower = price_type.lower().strip()
     price_type_map = {
-        "montly": "monthly", "monthyl": "monthly", "mth": "monthly", "month": "monthly",
-        "nightl": "nightly", "night": "nightly", "daily": "daily", "day": "daily",
-        "weakly": "weekly", "weakyl": "weekly", "week": "weekly",
-        "yr": "yearly", "year": "yearly", "annum": "yearly",
     }
     return price_type_map.get(pt_lower, pt_lower)
 def _normalize_listing_type(listing_type: str) -> str:
     if not listing_type:
         return None
     lt_lower = listing_type.lower().strip()
     listing_type_map = {
-        "for rent": "rent", "rental": "rent",
-        "short stay": "short-stay", "short-stay": "short-stay", "shortsta": "short-stay",
-        "short stya": "short-stay", "stayover": "short-stay",
-        "roommate": "roommate", "roommat": "roommate", "sharing": "roommate",
-        "flatmate": "roommate", "shareflat": "roommate",
-        "for sale": "sale", "selling": "sale", "sell": "sale",
     }
     return listing_type_map.get(lt_lower, lt_lower)
 def _get_missing_fields(data: Dict) -> List[str]:
     if data.get("intent") != "list":
         return []
     required = ["location", "bedrooms", "bathrooms", "price", "listing_type", "price_type"]
@@ -111,6 +122,7 @@ def _get_missing_fields(data: Dict) -> List[str]:
     return missing
 def _get_next_question(missing_fields: List[str]) -> str:
     if not missing_fields:
         return None
     next_field = missing_fields[0]
@@ -125,263 +137,246 @@ def _get_next_question(missing_fields: List[str]) -> str:
     return questions.get(next_field, "What else should I know?")
 def _build_draft_preview(data: dict) -> dict:
-    """Return the same dict draft_node puts in state['draft_preview']."""
-    title       = _generate_title(data)
     description = _generate_description(data)
-    icons       = _add_amenity_icons(data.get("amenities", []))
-    images      = data.get("draft", {}).get("images", []) if isinstance(data.get("draft"), dict) else []
     return {
-        "title"                : title,
-        "description"          : description,
-        "location"             : data.get("location", "").title(),
-        "bedrooms"             : data.get("bedrooms"),
-        "bathrooms"            : data.get("bathrooms"),
-        "price"                : data.get("price"),
-        "price_type"           : data.get("price_type"),
-        "listing_type"         : data.get("listing_type"),
-        "amenities"            : data.get("amenities", []),
-        "amenities_with_icons" : icons,
-        "requirements"         : data.get("requirements"),
-        "currency"             : data.get("currency", "XOF"),
-        "images"               : images,
-        "field_confidences"    : data.get("field_validations", {}),
     }
-# ---------- node ----------
-@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=5))
 async def intent_node(state: Dict) -> Dict:
     """
-    LangGraph node:
-    - FIRST: Check if preview is active and detect COMMANDS
-    - THEN: Do normal intent extraction for new listings/searches
     """
     current_msg = _get_current_message(state).lower()
     status = state.get("status")
-    # ===== CRITICAL: Handle commands FIRST when preview is active =====
-    if status in {"preview_shown", "waiting_for_images"} and state.get("draft_preview"):
-        logger.info(f"🎯 COMMAND DETECTION MODE: status={status}")
-        # PUBLISH command
-        if any(w in current_msg for w in {"publish", "go live", "post it", "list it", "confirm", "yes", "ok", "okay"}):
-            logger.info("📤 COMMAND: publish – routing to publish_node")
-            state["intent"] = "publish"
-            state["ai_reply"] = ""  # publish_node will fill this
             return state
-        # EDIT command
-        if "edit" in current_msg or "change" in current_msg or "update" in current_msg:
-            field = current_msg.replace("edit", "").replace("change", "").replace("update", "").strip()
-            logger.info(f"✏️  COMMAND: edit field='{field}'")
-            state["status"] = "collecting"
-            state["missing_fields"] = [field] if field else ["location"]
-            state["ai_reply"] = f"Sure! What would you like to change for **{field}**?"
-            return state
-        # DISCARD command
-        if any(w in current_msg for w in {"discard", "cancel", "delete", "remove", "no thanks"}):
-            logger.info("🗑️  COMMAND: discard – clearing draft")
-            state["status"] = None
-            state["draft_preview"] = None
-            state["intent"] = None
-            state["ai_reply"] = "Draft discarded. Let me know if you want to list another property!"
-            return state
-        # SEARCH command while draft is active
-        if any(w in current_msg for w in {"search", "find", "look for", "show me"}):
-            logger.info("🔍 COMMAND: search – switching to search mode")
-            state["intent"] = "search"
-            state["status"] = None
-            # keep draft_preview intact so preview stays visible
             return state
-        # Any other message while draft is active = casual chat
-        logger.info("💬 COMMAND: casual chat – keeping preview active")
-        state["ai_reply"] = "I'm here to help! You can say **publish** to list, **edit** to change something, or ask me anything else."
-        return state
-    # ===== End command detection =====
-    # ===== NORMAL FLOW: New listing or search intent =====
-    if state.get("status") in ["published", "error"]:
-        logger.info(f"⭐ Skipping intent_node, status={state.get('status')}")
-        return state
-    user_role = state["user_role"]
-    user_id = state.get("user_id")
-    human_msg = state["messages"][-1]["content"]
-    prompt = SYSTEM_PROMPT.replace("{user_role}", user_role)
-    messages = [
-        {"role": "system", "content": prompt},
-        {"role": "user", "content": human_msg},
-    ]
-    logger.info("🤖 Aida intent call with LLM router", user_role=user_role, msg=human_msg)
-    try:
-        # Use smart LLM routing instead of direct client call
-        text, model_used, usage = await call_llm_smart(
-            messages=messages,
-            intent="extraction",
-            max_tokens=MAX_TOKENS,
-            temperature=TEMP,
-        )
-        raw = text.strip()
-        logger.debug("🤖 LLM response received", model=model_used, usage=usage)
-    except Exception as e:
-        logger.error("❌ LLM call failed", exc_info=e)
-        data = {"allowed": False, "ai_reply": "Sorry, I couldn't process that. Please try again."}
-        state.update(allowed=False, ai_reply=data["ai_reply"])
-        return state
-    try:
-        cleaned = _clean_json(raw)
-        data = json.loads(cleaned)
-    except json.JSONDecodeError as e:
-        logger.error("❌ Aida bad json", raw=raw, exc_info=e)
-        data = {"allowed": False, "ai_reply": "Sorry, I didn't understand that. Could you rephrase?"}
-    # Handle LISTING INTENT with progressive collection
-    if data.get("intent") == "list":
-        data["allowed"] = True
-        missing = _get_missing_fields(data)
-        intro_and_example = """To list a property, here's how it's done:
-📝 **Example:**
-"I have a 3-bedroom, 2-bathroom property in Cotonou for rent at 50,000 XOF per month. It has a balcony, kitchen, and dryer. It's fully furnished. Renters must pay a 3-month deposit."
-📸 You can also upload property photos to make it more attractive!"""
-        should_show_example = not any([
-            data.get("bedrooms"), data.get("bathrooms"), data.get("price"),
-            data.get("amenities"), data.get("requirements")
-        ])
-        if missing:
-            data["status"] = "collecting"
-            data["missing_fields"] = missing
-            data["next_question"] = _get_next_question(missing)
-            data["ai_reply"] = intro_and_example if should_show_example else _get_next_question(missing)
-        else:
-            # All required fields complete – move to draft_ready
-            data["status"] = "draft_ready"
-            data["missing_fields"] = []
-            data["draft_preview"] = _build_draft_preview(data)
-            data["ai_reply"] = "Perfect! Let me prepare your listing draft..."
-            logger.info("✅ All required fields complete, moving to draft_ready")
-    # SEARCH is always allowed (role_gate_node will check)
-    if data.get("intent") == "search":
-        data["allowed"] = True
-    # Normalize values
-    location = _normalize_locations(data.get("location"))
-    amenities = _normalize_amenities(data.get("amenities", []))
-    price_type = _normalize_price_type(data.get("price_type"))
-    listing_type = _normalize_listing_type(data.get("listing_type"))
-    # SMART INFERENCE + ML VALIDATION with ERROR HANDLING
-    if data.get("intent") == "list":
-        location_input = data.get("location")
-        if location_input:
             try:
-                city, location_info = await ml_extractor.extract_location_from_address(location_input)
-                if city:
-                    data["location"] = city
-                    data["location_details"] = location_info
-                    logger.info(f"✅ Extracted city from address: {location_input} → {city}")
             except Exception as e:
-                logger.warning(f"⚠️ Failed to extract location: {e}")
-        try:
-            listing_type, listing_confidence = ml_extractor.infer_listing_type(
-                data, user_role=user_role, user_message=human_msg
-            )
-            if listing_type:
-                data["listing_type"] = listing_type
-                data["listing_confidence"] = listing_confidence
-                logger.info(f"✅ Inferred listing_type: {listing_type} (confidence: {listing_confidence})")
-        except Exception as e:
-            logger.warning(f"⚠️ Failed to infer listing_type: {e}")
-        try:
-            currency, extracted_city, currency_confidence = await ml_extractor.infer_currency(data)
-            if currency:
-                data["currency"] = currency
-                data["currency_confidence"] = currency_confidence
-                if extracted_city:
-                    data["location"] = extracted_city
-                logger.info(f"✅ Inferred currency: {currency} (confidence: {currency_confidence})")
-        except Exception as e:
-            logger.warning(f"⚠️ Failed to infer currency: {e}")
-            data["currency"] = data.get("currency", "XOF")
         try:
-            validation_issues = []
-            validation_suggestions = []
-            field_validations = {}
-            for field in ["location", "bedrooms", "bathrooms", "price", "price_type"]:
-                value = data.get(field)
-                if value is not None:
-                    result = ml_extractor.validate_field(field, value, human_msg, user_id)
-                    field_validations[field] = result
-                    if not result["is_valid"]:
-                        validation_issues.append(f"❌ {field}: {result['suggestion']}")
-                        logger.warning(f"Validation failed for {field}", suggestion=result["suggestion"])
-                    elif result["suggestion"]:
-                        validation_suggestions.append(f"💡 {field}: {result['suggestion']}")
-            data["field_validations"] = field_validations
-            data["validation_suggestions"] = validation_suggestions
-            if validation_issues:
-                current_reply = data.get("ai_reply", "")
-                data["ai_reply"] = current_reply + "\n\n" + "\n".join(validation_issues)
-                logger.info("⚠️ ML validation issues found", issues=validation_issues)
-        except Exception as e:
-            logger.warning(f"⚠️ Failed to validate fields: {e}")
-    intent_value = data.get("intent")
-    # Update state with all fields
-    state.update(
-        allowed=data.get("allowed", False),
-        status=data.get("status"),
-        missing_fields=data.get("missing_fields", []),
-        next_question=data.get("next_question"),
-        # Listing fields
-        listing_type=listing_type,
-        location=location,
-        bedrooms=data.get("bedrooms"),
-        bathrooms=data.get("bathrooms"),
-        price=data.get("price"),
-        price_type=price_type,
-        amenities=amenities,
-        requirements=data.get("requirements"),
-        # Search fields
-        min_price=data.get("min_price"),
-        max_price=data.get("max_price"),
-        # ML fields
-        field_validations=data.get("field_validations"),
-        listing_confidence=data.get("listing_confidence"),
-        currency_confidence=data.get("currency_confidence"),
-        location_details=data.get("location_details"),
-        validation_suggestions=data.get("validation_suggestions"),
-        # Other
-        currency=data.get("currency", "XOF"),
-        ai_reply=data.get("ai_reply", ""),
-        draft_preview=data.get("draft_preview"),
-    )
-    logger.info("👤 Intent node processed",
-                intent=intent_value,
-                status=state.get("status"),
-                missing_fields=state.get("missing_fields"),
-                location=state.get("location"),
-                amenities=state.get("amenities"))
-    return state

+# app/ai/nodes/intent_node.py – FINAL: Multi-LLM routing + context mgmt + observability
 import json
 import re
 from typing import Dict, List
+import logging
 from tenacity import retry, stop_after_attempt, wait_exponential
 from app.core.llm_router import call_llm_smart
 from app.core.context_manager import get_context_manager
+from app.core.error_handling import (
+    async_retry,
+    RetryStrategy,
+    trace_operation,
+    LLMError,
+)
+from app.core.observability import get_token_tracker
 from app.ml.models.ml_listing_extractor import get_ml_extractor
 from app.ai.nodes.draft_node import (
     _generate_title,
     _add_amenity_icons,
 )
+logger = logging.getLogger(__name__)
 MAX_TOKENS = 600
 TEMP = 0
 ml_extractor = get_ml_extractor()
+# ============================================================
+# Helpers
+# ============================================================
 def _load_system() -> str:
+    """Load system prompt from file."""
+    try:
+        with open("app/ai/prompts/system_prompt.txt", encoding="utf-8") as f:
+            return f.read()
+    except FileNotFoundError:
+        logger.error("❌ System prompt file not found")
+        return "You are Aida, a helpful AI assistant."
 SYSTEM_PROMPT = _load_system()
 def _clean_json(raw: str) -> str:
+    """Clean JSON response by removing markdown artifacts."""
     cleaned = re.sub(r'```json\s*', '', raw)
     cleaned = re.sub(r'```\s*', '', cleaned)
     return cleaned.strip()
     return full.strip()
 def _normalize_locations(location: str) -> str:
+    """Normalize location names."""
     if not location:
         return None
     loc_lower = location.lower().strip()
     location_map = {
+        "lago": "lagos", "lgs": "lagos",
+        "cotnu": "cotonou", "cotonus": "cotonou",
+        "akpakpa": "akpakpa", "nairobi": "nairobi",
+        "accra": "accra", "joburg": "johannesburg",
     }
     return location_map.get(loc_lower, location.lower())
 def _normalize_amenities(amenities: list) -> list:
+    """Normalize amenity names."""
     if not amenities:
         return []
     amenity_map = {
+        "balcno": "balcony", "parkng": "parking",
+        "furnisd": "furnished", "ac": "air conditioning",
     }
     normalized = []
     for amenity in amenities:
     return normalized
 def _normalize_price_type(price_type: str) -> str:
+    """Normalize price type."""
     if not price_type:
         return None
     pt_lower = price_type.lower().strip()
     price_type_map = {
+        "montly": "monthly", "mth": "monthly",
+        "nightl": "nightly", "weakly": "weekly",
     }
     return price_type_map.get(pt_lower, pt_lower)
 def _normalize_listing_type(listing_type: str) -> str:
+    """Normalize listing type."""
     if not listing_type:
         return None
     lt_lower = listing_type.lower().strip()
     listing_type_map = {
+        "for rent": "rent", "short stay": "short-stay",
+        "for sale": "sale", "roommate": "roommate",
     }
     return listing_type_map.get(lt_lower, lt_lower)
 def _get_missing_fields(data: Dict) -> List[str]:
+    """Get missing required fields for listing."""
     if data.get("intent") != "list":
         return []
     required = ["location", "bedrooms", "bathrooms", "price", "listing_type", "price_type"]
     return missing
 def _get_next_question(missing_fields: List[str]) -> str:
+    """Get next question for missing field."""
     if not missing_fields:
         return None
     next_field = missing_fields[0]
     return questions.get(next_field, "What else should I know?")
 def _build_draft_preview(data: dict) -> dict:
+    """Build draft preview object."""
+    title = _generate_title(data)
     description = _generate_description(data)
+    icons = _add_amenity_icons(data.get("amenities", []))
+    images = data.get("draft", {}).get("images", []) if isinstance(data.get("draft"), dict) else []
     return {
+        "title": title,
+        "description": description,
+        "location": data.get("location", "").title(),
+        "bedrooms": data.get("bedrooms"),
+        "bathrooms": data.get("bathrooms"),
+        "price": data.get("price"),
+        "price_type": data.get("price_type"),
+        "listing_type": data.get("listing_type"),
+        "amenities": data.get("amenities", []),
+        "amenities_with_icons": icons,
+        "requirements": data.get("requirements"),
+        "currency": data.get("currency", "XOF"),
+        "images": images,
+        "field_confidences": data.get("field_validations", {}),
     }
+# ============================================================
+# Intent Node
+# ============================================================
+@async_retry(strategy=RetryStrategy.MODERATE, operation_name="intent_node")
 async def intent_node(state: Dict) -> Dict:
     """
+    LangGraph node: Extract and route user intent
+    Features:
+    - Command detection when preview active
+    - Smart LLM routing with auto-fallback
+    - Context window management
+    - ML validation and inference
+    - Full error handling and observability
     """
     current_msg = _get_current_message(state).lower()
     status = state.get("status")
+    with trace_operation(
+        "intent_node",
+        {
+            "status": status,
+            "has_draft": state.get("draft_preview") is not None,
+        }
+    ):
+        # ===== CRITICAL: Handle commands FIRST when preview is active =====
+        if status in {"preview_shown", "waiting_for_images"} and state.get("draft_preview"):
+            logger.info(f"🎯 COMMAND DETECTION MODE: status={status}")
+            # PUBLISH command
+            if any(w in current_msg for w in {"publish", "go live", "confirm", "yes", "ok"}):
+                logger.info("📤 COMMAND: publish")
+                state["intent"] = "publish"
+                state["ai_reply"] = ""
+                return state
+            # EDIT command
+            if "edit" in current_msg or "change" in current_msg or "update" in current_msg:
+                field = current_msg.replace("edit", "").replace("change", "").replace("update", "").strip()
+                logger.info(f"✏️ COMMAND: edit field='{field}'")
+                state["status"] = "collecting"
+                state["missing_fields"] = [field] if field else ["location"]
+                state["ai_reply"] = f"Sure! What would you like to change for **{field}**?"
+                return state
+            # DISCARD command
+            if any(w in current_msg for w in {"discard", "cancel", "delete", "no"}):
+                logger.info("🗑️ COMMAND: discard")
+                state["status"] = None
+                state["draft_preview"] = None
+                state["intent"] = None
+                state["ai_reply"] = "Draft discarded. Let me know if you want to list another property!"
+                return state
+            # CASUAL CHAT
+            logger.info("💬 COMMAND: casual chat")
+            state["ai_reply"] = "Say **publish** to list, **edit** to change, or **discard** to start over."
             return state
+        # ===== End command detection =====
+        if state.get("status") in ["published", "error"]:
+            logger.info(f"⭐ Skipping intent_node, status={state.get('status')}")
             return state
+        user_role = state["user_role"]
+        user_id = state.get("user_id")
+        human_msg = state["messages"][-1]["content"]
+        # ===== LLM CALL WITH SMART ROUTING =====
+        with trace_operation("llm_call_with_routing"):
             try:
+                # Manage context
+                context_mgr = get_context_manager()
+                messages = await context_mgr.manage_context([
+                    {"role": "system", "content": SYSTEM_PROMPT.replace("{user_role}", user_role)},
+                    {"role": "user", "content": human_msg},
+                ])
+                logger.info(f"🤖 Calling LLM with smart routing")
+                # Call LLM with smart routing
+                text, model_used, usage = await call_llm_smart(
+                    messages,
+                    intent=state.get("intent"),
+                    temperature=TEMP,
+                    max_tokens=MAX_TOKENS,
+                )
+                # Track tokens
+                tracker = get_token_tracker()
+                tracker.record_tokens(
+                    model_used,
+                    usage.get("prompt_tokens", 0),
+                    usage.get("completion_tokens", 0),
+                )
+                logger.info(
+                    f"✅ LLM response from {model_used}",
+                    extra={
+                        "tokens": usage.get("total_tokens", 0),
+                        "duration_ms": usage.get("duration_ms", 0),
+                    }
+                )
+                raw = text
+            except LLMError as e:
+                logger.error(f"❌ LLM error: {e.message}")
+                state["ai_reply"] = "Sorry, I'm having trouble. Please try again."
+                state["status"] = "error"
+                return state
             except Exception as e:
+                logger.error(f"❌ Unexpected LLM error: {e}", exc_info=True)
+                raise
+        # ===== Parse JSON response =====
         try:
+            cleaned = _clean_json(raw)
+            data = json.loads(cleaned)
+        except json.JSONDecodeError as e:
+            logger.error(f"❌ Invalid JSON response: {raw[:100]}")
+            data = {"allowed": False, "ai_reply": "Sorry, I didn't understand that. Could you rephrase?"}
+        # ===== Handle LISTING INTENT =====
+        if data.get("intent") == "list":
+            data["allowed"] = True
+            missing = _get_missing_fields(data)
+            if missing:
+                data["status"] = "collecting"
+                data["missing_fields"] = missing
+                data["next_question"] = _get_next_question(missing)
+            else:
+                data["status"] = "draft_ready"
+                data["missing_fields"] = []
+                data["draft_preview"] = _build_draft_preview(data)
+                data["ai_reply"] = "Perfect! Let me prepare your listing draft..."
+                logger.info("✅ All required fields complete")
+        # ===== Handle SEARCH INTENT =====
+        if data.get("intent") == "search":
+            data["allowed"] = True
+        # ===== Normalize values =====
+        location = _normalize_locations(data.get("location"))
+        amenities = _normalize_amenities(data.get("amenities", []))
+        price_type = _normalize_price_type(data.get("price_type"))
+        listing_type = _normalize_listing_type(data.get("listing_type"))
+        # ===== ML INFERENCE & VALIDATION =====
+        if data.get("intent") == "list":
+            with trace_operation("ml_processing"):
+                # Extract location
+                if data.get("location"):
+                    try:
+                        city, loc_info = await ml_extractor.extract_location_from_address(data["location"])
+                        if city:
+                            data["location"] = city
+                            data["location_details"] = loc_info
+                            logger.info(f"✅ Location extracted: {data['location']}")
+                    except Exception as e:
+                        logger.warning(f"⚠️ Location extraction failed: {e}")
+                # Infer listing type
+                try:
+                    lt, conf = ml_extractor.infer_listing_type(
+                        data, user_role=user_role, user_message=human_msg
+                    )
+                    if lt:
+                        data["listing_type"] = lt
+                        data["listing_confidence"] = conf
+                        logger.info(f"✅ Listing type inferred: {lt}")
+                except Exception as e:
+                    logger.warning(f"⚠️ Listing type inference failed: {e}")
+                # Infer currency
+                try:
+                    currency, city, conf = await ml_extractor.infer_currency(data)
+                    if currency:
+                        data["currency"] = currency
+                        data["currency_confidence"] = conf
+                        logger.info(f"✅ Currency inferred: {currency}")
+                except Exception as e:
+                    logger.warning(f"⚠️ Currency inference failed: {e}")
+                    data["currency"] = data.get("currency", "XOF")
+        # ===== Update state =====
+        state.update(
+            allowed=data.get("allowed", False),
+            status=data.get("status"),
+            missing_fields=data.get("missing_fields", []),
+            next_question=data.get("next_question"),
+            listing_type=listing_type,
+            location=location,
+            bedrooms=data.get("bedrooms"),
+            bathrooms=data.get("bathrooms"),
+            price=data.get("price"),
+            price_type=price_type,
+            amenities=amenities,
+            requirements=data.get("requirements"),
+            min_price=data.get("min_price"),
+            max_price=data.get("max_price"),
+            currency=data.get("currency", "XOF"),
+            ai_reply=data.get("ai_reply", ""),
+            draft_preview=data.get("draft_preview"),
+        )
+        logger.info(
+            f"👤 Intent node processed",
+            extra={
+                "intent": data.get("intent"),
+                "status": state.get("status"),
+            }
+        )
+        return state

app/ai/nodes/search_node.py CHANGED Viewed

@@ -1,20 +1,27 @@
-# app/ai/nodes/search_node.py - Return UI-ready search results with error handling
-import json
 import httpx
 from typing import Dict, List
 from qdrant_client import AsyncQdrantClient, models
-from app.config import settings
-from structlog import get_logger
 from tenacity import retry, stop_after_attempt, wait_exponential
-logger = get_logger(__name__)
 EMBED_MODEL = "qwen/qwen3-embedding-8b"
 TOP_K = 6
-# ------------------------------------------------------------------
-# Qdrant client
-# ------------------------------------------------------------------
 qdrant_client = AsyncQdrantClient(
     url=settings.QDRANT_URL,
     api_key=settings.QDRANT_API_KEY,
@@ -22,20 +29,26 @@ qdrant_client = AsyncQdrantClient(
     timeout=60,
 )
-# ---------- error handling ----------
 class SearchError(Exception):
     """Base exception for search operations."""
     pass
-class VectorDBError(SearchError):
-    """Qdrant/Vector DB error."""
-    pass
 class EmbeddingError(SearchError):
     """Embedding generation error."""
     pass
-# ---------- helpers ----------
 def _build_filter(state: Dict) -> models.Filter:
     """Build comprehensive Qdrant filter from ALL search fields."""
     must = []
@@ -50,7 +63,7 @@ def _build_filter(state: Dict) -> models.Filter:
             )
         )
-    # Price range filters (combine into single condition)
     if state.get("min_price") is not None or state.get("max_price") is not None:
         price_range = {}
         if state.get("min_price") is not None:
@@ -84,7 +97,7 @@ def _build_filter(state: Dict) -> models.Filter:
             )
         )
-    # Price type filter (monthly, nightly, yearly, etc.)
     price_type = (state.get("price_type") or "").lower()
     if price_type:
         must.append(
@@ -94,7 +107,7 @@ def _build_filter(state: Dict) -> models.Filter:
             )
         )
-    # Listing type filter (rent, short_stay, roommate, sale)
     listing_type = (state.get("listing_type") or "").lower()
     if listing_type:
         must.append(
@@ -104,7 +117,7 @@ def _build_filter(state: Dict) -> models.Filter:
             )
         )
-    # Amenities filter (all mentioned amenities must exist in listing)
     amenities = state.get("amenities", [])
     if amenities:
         for amenity in amenities:
@@ -118,67 +131,94 @@ def _build_filter(state: Dict) -> models.Filter:
                 )
     filt = models.Filter(must=must) if must else models.Filter()
-    logger.info("🔍 Filter built", must_conditions=len(must), location=loc,
-                min_price=state.get("min_price"), max_price=state.get("max_price"),
-                bedrooms=state.get("bedrooms"), bathrooms=state.get("bathrooms"),
-                amenities=amenities, price_type=price_type, listing_type=listing_type)
     return filt
-@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=5))
 async def _embed(text: str) -> List[float]:
-    """Call OpenRouter embedding endpoint with retry logic."""
-    if not text or not text.strip():
-        raise EmbeddingError("Empty text provided for embedding")
-    payload = {
-        "model": EMBED_MODEL,
-        "input": text,
-        "encoding_format": "float",
-    }
-    headers = {
-        "Authorization": f"Bearer {settings.OPENROUTER_API_KEY}",
-        "Content-Type": "application/json",
-        "HTTP-Referer": "",
-        "X-Title": "",
-    }
-    try:
-        async with httpx.AsyncClient(timeout=60) as client:
-            resp = await client.post(
-                "https://openrouter.ai/api/v1/embeddings",
-                headers=headers,
-                json=payload,
-            )
-            resp.raise_for_status()
-            data = resp.json()
-            if not data.get("data"):
-                raise EmbeddingError("Empty embedding response from API")
-            return data["data"][0]["embedding"]
-    except httpx.HTTPError as e:
-        logger.error("❌ Embedding API HTTP error", exc_info=e)
-        raise EmbeddingError(f"Failed to get embedding: {e}")
-    except KeyError as e:
-        logger.error("❌ Embedding API response malformed", exc_info=e)
-        raise EmbeddingError(f"Malformed embedding response: {e}")
-# ---------- suggestion helpers ----------
-@retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=1, max=3))
 async def _search_with_must(must: List, vector: List[float]):
     """Execute Qdrant search with retry logic."""
-    try:
-        return await qdrant_client.search(
-            collection_name="listings",
-            query_vector=vector,
-            query_filter=models.Filter(must=must),
-            limit=TOP_K,
-            with_payload=True,
-        )
-    except Exception as e:
-        logger.error("❌ Qdrant search failed", exc_info=e)
-        raise VectorDBError(f"Search failed: {e}")
 def _add_price_range(must: List, state: Dict):
     """Add combined price range filter."""
@@ -190,199 +230,225 @@ def _add_price_range(must: List, state: Dict):
             price_range["lte"] = state["max_price"]
         if price_range:
-            must.append(models.FieldCondition(key="price", range=models.Range(**price_range)))
-def _hits_to_cards(hits):
     """Convert Qdrant hits to UI cards."""
-    return [
-        {
-            "id": hit.id,
-            "title": hit.payload.get("title") or f"{hit.payload.get('bedrooms', '')}-bed {hit.payload.get('location', '')}",
-            "location": hit.payload.get("location"),
-            "price": hit.payload.get("price"),
-            "price_type": hit.payload.get("price_type"),
-            "bedrooms": hit.payload.get("bedrooms"),
-            "bathrooms": hit.payload.get("bathrooms"),
-            "amenities": hit.payload.get("amenities", []),
-            "description": hit.payload.get("description"),
-            "listing_type": hit.payload.get("listing_type"),
-            "images": hit.payload.get("images", []),  # ✅ Include images for UI
-            "currency": hit.payload.get("currency", "XOF"),
-        }
-        for hit in hits
-    ]
 async def _suggest_relaxed(state: Dict, vector: List[float]) -> List[dict]:
     """
     Loosen constraints progressively while keeping location strict.
-    If no location specified, return empty.
     """
-    loc = (state.get("location") or "").lower()
-    br = state.get("bedrooms")
-    amenities = state.get("amenities", [])
-    # If no location specified, return empty
-    if not loc:
-        return []
-    # Location is ALWAYS a hard constraint
-    location_filter = models.FieldCondition(
-        key="location_lower",
-        match=models.MatchValue(value=loc)
-    )
-    # 1. Try with all filters intact
-    try:
-        must = [location_filter]
-        if br is not None:
-            must.append(models.FieldCondition(key="bedrooms", match=models.MatchValue(value=br)))
-        _add_price_range(must, state)
-        for amenity in amenities:
-            must.append(models.FieldCondition(key="amenities", match=models.MatchValue(value=amenity.lower())))
-        hits = await _search_with_must(must, vector)
-        if hits:
-            return _hits_to_cards(hits)
-    except VectorDBError as e:
-        logger.warning(f"⚠️ Search with all filters failed: {e}")
-    # 2. Loosen amenities (remove optional ones)
-    try:
-        must = [location_filter]
-        if br is not None:
-            must.append(models.FieldCondition(key="bedrooms", match=models.MatchValue(value=br)))
-        _add_price_range(must, state)
-        hits = await _search_with_must(must, vector)
-        if hits:
-            return _hits_to_cards(hits)
-    except VectorDBError as e:
-        logger.warning(f"⚠️ Search without amenities failed: {e}")
-    # 3. Loosen bedrooms ±1, keep location strict
-    if br is not None:
         try:
             must = [location_filter]
-            new_br = br - 1 if br > 1 else br + 1
-            must.append(models.FieldCondition(key="bedrooms", match=models.MatchValue(value=new_br)))
             _add_price_range(must, state)
             hits = await _search_with_must(must, vector)
             if hits:
                 return _hits_to_cards(hits)
-        except VectorDBError as e:
-            logger.warning(f"⚠️ Search with adjusted bedrooms failed: {e}")
-    # 4. Loosen price +25%, keep location strict
-    try:
-        must = [location_filter]
         if br is not None:
-            must.append(models.FieldCondition(key="bedrooms", match=models.MatchValue(value=br)))
-        if state.get("max_price") is not None:
-            relaxed_max = int(state["max_price"] * 1.25)
-            must.append(models.FieldCondition(key="price", range=models.Range(lte=relaxed_max)))
-        else:
-            _add_price_range(must, state)
-        hits = await _search_with_must(must, vector)
-        if hits:
-            return _hits_to_cards(hits)
-    except VectorDBError as e:
-        logger.warning(f"⚠️ Search with relaxed price failed: {e}")
-    return []
-# ---------- node ----------
 async def search_node(state: Dict) -> Dict:
     """
-    LangGraph node: comprehensive search with all filters
-    - title/description (semantic via embedding)
-    - location, price range, bedrooms, bathrooms, amenities (keyword filters)
-    - price_type, listing_type
-    ✅ RETURNS: search_results + search_preview (UI-ready)
-    ✅ ERROR HANDLING: Graceful fallback on Qdrant/embedding failures
     """
-    query = state.get("search_query", "") or state["messages"][-1]["content"]
-    # Get embedding with error handling
-    try:
-        vector = await _embed(query)
-        logger.info("✅ Embedding generated successfully")
-    except EmbeddingError as e:
-        logger.error("❌ Failed to generate embedding", exc_info=e)
-        state["ai_reply"] = "Sorry, I couldn't process your search right now. Please try again."
-        state["search_preview"] = {
-            "type": "search_results",
-            "count": 0,
-            "query": query,
-            "filters": {},
-            "results": [],
-            "message": "Search temporarily unavailable"
-        }
-        state["search_results"] = []
-        return state
-    # Build filter
-    filt = _build_filter(state)
-    logger.info("🔍 Searching Qdrant", query=query, filter=str(filt))
-    # Execute search with error handling
-    try:
-        hits = await _search_with_must(filt.must if filt.must else [], vector)
-        logger.info("📍 Qdrant search result", hits_count=len(hits))
-    except VectorDBError as e:
-        logger.error("❌ Qdrant search failed", exc_info=e)
-        state["ai_reply"] = "Sorry, I'm having trouble searching right now. Please try again."
-        state["search_preview"] = {
-            "type": "search_results",
-            "count": 0,
-            "query": query,
-            "filters": {
-                "location": state.get("location"),
-                "min_price": state.get("min_price"),
-                "max_price": state.get("max_price"),
-                "bedrooms": state.get("bedrooms"),
-                "bathrooms": state.get("bathrooms"),
-                "price_type": state.get("price_type"),
-                "listing_type": state.get("listing_type"),
-                "amenities": state.get("amenities", []),
-            },
-            "results": [],
-            "message": "Search service temporarily unavailable"
         }
-        state["search_results"] = []
-        return state
-    cards = _hits_to_cards(hits)
-    # --- personalize zero-hit reply + suggestions + UI preview
-    if not cards:
-        location = state.get("location") or "that area"
-        bedrooms = state.get("bedrooms")
-        price_bit = (
-            " in your price range"
-            if state.get("min_price") is not None or state.get("max_price") is not None
-            else ""
-        )
-        br_bit = f" with {bedrooms} bedrooms" if bedrooms else ""
-        amenities_bit = f" with {', '.join(state.get('amenities', []))}" if state.get("amenities") else ""
         try:
-            suggestions = await _suggest_relaxed(state, vector)
-            logger.info("💡 Suggestions generated", count=len(suggestions))
-        except Exception as e:
-            logger.warning(f"⚠️ Failed to generate suggestions: {e}")
-            suggestions = []
-        if suggestions:
-            state["ai_reply"] = (
-                f"I found no exact match for your request, "
-                f"but you might like these similar options:"
-            )
-            state["search_preview"] = {  # ✅ NEW: UI-ready preview for suggestions
                 "type": "search_results",
-                "count": len(suggestions),
                 "query": query,
                 "filters": {
                     "location": state.get("location"),
@@ -390,22 +456,77 @@ async def search_node(state: Dict) -> Dict:
                     "max_price": state.get("max_price"),
                     "bedrooms": state.get("bedrooms"),
                     "bathrooms": state.get("bathrooms"),
-                    "price_type": state.get("price_type"),
-                    "listing_type": state.get("listing_type"),
                     "amenities": state.get("amenities", []),
                 },
-                "results": suggestions,
-                "message": "Similar options available"
             }
-            state["search_results"] = suggestions
         else:
-            state["ai_reply"] = (
-                f"I found no property in {location}{price_bit}{br_bit}{amenities_bit}. "
-                "Try widening your search or check back later!"
-            )
-            state["search_preview"] = {  # ✅ Empty UI state
                 "type": "search_results",
-                "count": 0,
                 "query": query,
                 "filters": {
                     "location": state.get("location"),
@@ -417,30 +538,10 @@ async def search_node(state: Dict) -> Dict:
                     "listing_type": state.get("listing_type"),
                     "amenities": state.get("amenities", []),
                 },
-                "results": [],
-                "message": "No results found"
             }
-    else:
-        state["ai_reply"] = f"Here are {len(cards)} places I found for you:"
-        # ✅ NEW: Return UI-ready search preview
-        state["search_preview"] = {
-            "type": "search_results",
-            "count": len(cards),
-            "query": query,
-            "filters": {
-                "location": state.get("location"),
-                "min_price": state.get("min_price"),
-                "max_price": state.get("max_price"),
-                "bedrooms": state.get("bedrooms"),
-                "bathrooms": state.get("bathrooms"),
-                "price_type": state.get("price_type"),
-                "listing_type": state.get("listing_type"),
-                "amenities": state.get("amenities", []),
-            },
-            "results": cards,
-            "message": f"Found {len(cards)} listings"
-        }
-        state["search_results"] = cards
-    logger.info("✅ Search node finished", query=query, count=len(cards))
-    return state

+# app/ai/nodes/search_node.py - FINAL: Complete error handling + retry logic + observability
+import logging
 import httpx
 from typing import Dict, List
 from qdrant_client import AsyncQdrantClient, models
 from tenacity import retry, stop_after_attempt, wait_exponential
+from app.config import settings
+from app.core.error_handling import (
+    async_retry,
+    RetryStrategy,
+    trace_operation,
+    VectorDBError,
+)
+logger = logging.getLogger(__name__)
 EMBED_MODEL = "qwen/qwen3-embedding-8b"
 TOP_K = 6
+# ============================================================
+# Qdrant Client
+# ============================================================
 qdrant_client = AsyncQdrantClient(
     url=settings.QDRANT_URL,
     api_key=settings.QDRANT_API_KEY,
     timeout=60,
 )
+# ============================================================
+# Custom Exceptions
+# ============================================================
 class SearchError(Exception):
     """Base exception for search operations."""
     pass
 class EmbeddingError(SearchError):
     """Embedding generation error."""
     pass
+class QdrantSearchError(SearchError):
+    """Qdrant search error."""
+    pass
+# ============================================================
+# Helpers
+# ============================================================
 def _build_filter(state: Dict) -> models.Filter:
     """Build comprehensive Qdrant filter from ALL search fields."""
     must = []
             )
         )
+    # Price range filters
     if state.get("min_price") is not None or state.get("max_price") is not None:
         price_range = {}
         if state.get("min_price") is not None:
             )
         )
+    # Price type filter
     price_type = (state.get("price_type") or "").lower()
     if price_type:
         must.append(
             )
         )
+    # Listing type filter
     listing_type = (state.get("listing_type") or "").lower()
     if listing_type:
         must.append(
             )
         )
+    # Amenities filter
     amenities = state.get("amenities", [])
     if amenities:
         for amenity in amenities:
                 )
     filt = models.Filter(must=must) if must else models.Filter()
+    logger.info(
+        "🔍 Filter built",
+        extra={
+            "conditions": len(must),
+            "location": loc,
+            "price_range": f"{state.get('min_price')}-{state.get('max_price')}",
+            "bedrooms": state.get("bedrooms"),
+            "amenities": len(amenities),
+        }
+    )
     return filt
+@async_retry(strategy=RetryStrategy.MODERATE, operation_name="embedding_generation")
 async def _embed(text: str) -> List[float]:
+    """
+    Generate embedding with retry logic and error handling.
+    Raises:
+        EmbeddingError: If embedding generation fails
+    """
+    with trace_operation("embedding_generation", {"text_length": len(text)}):
+        if not text or not text.strip():
+            logger.error("❌ Empty text provided for embedding")
+            raise EmbeddingError("Empty text provided for embedding")
+        payload = {
+            "model": EMBED_MODEL,
+            "input": text,
+            "encoding_format": "float",
+        }
+        headers = {
+            "Authorization": f"Bearer {settings.OPENROUTER_API_KEY}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "",
+            "X-Title": "",
+        }
+        try:
+            async with httpx.AsyncClient(timeout=60) as client:
+                logger.info("🤖 Calling embedding API")
+                resp = await client.post(
+                    "https://openrouter.ai/api/v1/embeddings",
+                    headers=headers,
+                    json=payload,
+                    timeout=60,
+                )
+                resp.raise_for_status()
+                data = resp.json()
+                if not data.get("data"):
+                    logger.error("❌ Empty embedding response")
+                    raise EmbeddingError("Empty embedding response from API")
+                embedding = data["data"][0]["embedding"]
+                logger.info(f"✅ Embedding generated, dimension={len(embedding)}")
+                return embedding
+        except httpx.HTTPError as e:
+            logger.error(f"❌ Embedding API HTTP error: {e}", exc_info=True)
+            raise EmbeddingError(f"HTTP error calling embedding API: {e}")
+        except KeyError as e:
+            logger.error(f"❌ Embedding response malformed: {e}", exc_info=True)
+            raise EmbeddingError(f"Malformed embedding response: {e}")
+        except Exception as e:
+            logger.error(f"❌ Unexpected embedding error: {e}", exc_info=True)
+            raise EmbeddingError(f"Unexpected error generating embedding: {e}")
+@async_retry(strategy=RetryStrategy.MODERATE, operation_name="qdrant_search")
 async def _search_with_must(must: List, vector: List[float]):
     """Execute Qdrant search with retry logic."""
+    with trace_operation("qdrant_search_execution", {"filter_count": len(must)}):
+        try:
+            logger.info("🔎 Executing Qdrant search")
+            hits = await qdrant_client.search(
+                collection_name="listings",
+                query_vector=vector,
+                query_filter=models.Filter(must=must),
+                limit=TOP_K,
+                with_payload=True,
+            )
+            logger.info(f"✅ Qdrant search returned {len(hits)} results")
+            return hits
+        except Exception as e:
+            logger.error(f"❌ Qdrant search failed: {e}", exc_info=True)
+            raise QdrantSearchError(f"Qdrant search failed: {e}")
 def _add_price_range(must: List, state: Dict):
     """Add combined price range filter."""
             price_range["lte"] = state["max_price"]
         if price_range:
+            must.append(
+                models.FieldCondition(
+                    key="price",
+                    range=models.Range(**price_range)
+                )
+            )
+def _hits_to_cards(hits) -> List[dict]:
     """Convert Qdrant hits to UI cards."""
+    cards = []
+    for hit in hits:
+        try:
+            card = {
+                "id": hit.id,
+                "title": (
+                    hit.payload.get("title") or
+                    f"{hit.payload.get('bedrooms', '')}-bed {hit.payload.get('location', '')}"
+                ),
+                "location": hit.payload.get("location"),
+                "price": hit.payload.get("price"),
+                "price_type": hit.payload.get("price_type"),
+                "bedrooms": hit.payload.get("bedrooms"),
+                "bathrooms": hit.payload.get("bathrooms"),
+                "amenities": hit.payload.get("amenities", []),
+                "description": hit.payload.get("description"),
+                "listing_type": hit.payload.get("listing_type"),
+                "images": hit.payload.get("images", []),
+                "currency": hit.payload.get("currency", "XOF"),
+            }
+            cards.append(card)
+        except Exception as e:
+            logger.warning(f"⚠️ Failed to convert hit to card: {e}")
+            continue
+    return cards
 async def _suggest_relaxed(state: Dict, vector: List[float]) -> List[dict]:
     """
     Loosen constraints progressively while keeping location strict.
+    Returns relaxed search results if exact match not found.
     """
+    with trace_operation("suggest_relaxed", {"location": state.get("location")}):
+        loc = (state.get("location") or "").lower()
+        br = state.get("bedrooms")
+        amenities = state.get("amenities", [])
+        # If no location specified, return empty
+        if not loc:
+            logger.warning("⚠️ No location specified for suggestion")
+            return []
+        # Location is ALWAYS a hard constraint
+        location_filter = models.FieldCondition(
+            key="location_lower",
+            match=models.MatchValue(value=loc)
+        )
+        # Try progressively looser constraints
+        # 1. All filters
+        try:
+            logger.info("🔄 Trying search with all filters")
+            must = [location_filter]
+            if br is not None:
+                must.append(
+                    models.FieldCondition(
+                        key="bedrooms",
+                        match=models.MatchValue(value=br)
+                    )
+                )
+            _add_price_range(must, state)
+            for amenity in amenities:
+                must.append(
+                    models.FieldCondition(
+                        key="amenities",
+                        match=models.MatchValue(value=amenity.lower())
+                    )
+                )
+            hits = await _search_with_must(must, vector)
+            if hits:
+                logger.info("✅ Found results with all filters")
+                return _hits_to_cards(hits)
+        except Exception as e:
+            logger.warning(f"⚠️ Search with all filters failed: {e}")
+        # 2. No amenities
         try:
+            logger.info("🔄 Trying search without amenities")
             must = [location_filter]
+            if br is not None:
+                must.append(
+                    models.FieldCondition(
+                        key="bedrooms",
+                        match=models.MatchValue(value=br)
+                    )
+                )
             _add_price_range(must, state)
             hits = await _search_with_must(must, vector)
             if hits:
+                logger.info("✅ Found results without amenities")
                 return _hits_to_cards(hits)
+        except Exception as e:
+            logger.warning(f"⚠️ Search without amenities failed: {e}")
+        # 3. Adjust bedrooms ±1
         if br is not None:
+            try:
+                logger.info("🔄 Trying search with adjusted bedrooms")
+                must = [location_filter]
+                new_br = br - 1 if br > 1 else br + 1
+                must.append(
+                    models.FieldCondition(
+                        key="bedrooms",
+                        match=models.MatchValue(value=new_br)
+                    )
+                )
+                _add_price_range(must, state)
+                hits = await _search_with_must(must, vector)
+                if hits:
+                    logger.info(f"✅ Found results with {new_br} bedrooms")
+                    return _hits_to_cards(hits)
+            except Exception as e:
+                logger.warning(f"⚠️ Search with adjusted bedrooms failed: {e}")
+        # 4. Relax price +25%
+        try:
+            logger.info("🔄 Trying search with relaxed price")
+            must = [location_filter]
+            if br is not None:
+                must.append(
+                    models.FieldCondition(
+                        key="bedrooms",
+                        match=models.MatchValue(value=br)
+                    )
+                )
+            if state.get("max_price") is not None:
+                relaxed_max = int(state["max_price"] * 1.25)
+                must.append(
+                    models.FieldCondition(
+                        key="price",
+                        range=models.Range(lte=relaxed_max)
+                    )
+                )
+            else:
+                _add_price_range(must, state)
+            hits = await _search_with_must(must, vector)
+            if hits:
+                logger.info("✅ Found results with relaxed price")
+                return _hits_to_cards(hits)
+        except Exception as e:
+            logger.warning(f"⚠️ Search with relaxed price failed: {e}")
+        logger.warning("⚠️ No results found even with relaxed criteria")
+        return []
+# ============================================================
+# Search Node
+# ============================================================
 async def search_node(state: Dict) -> Dict:
     """
+    LangGraph node: Comprehensive search with all filters
+    Features:
+    - Semantic search via embeddings
+    - Keyword filters (location, price, bedrooms, etc.)
+    - Error handling with graceful fallback
+    - Suggestion system for zero-hit scenarios
+    - UI-ready response format
     """
+    with trace_operation(
+        "search_node",
+        {
+            "location": state.get("location"),
+            "bedrooms": state.get("bedrooms"),
+            "min_price": state.get("min_price"),
+            "max_price": state.get("max_price"),
         }
+    ):
+        query = state.get("search_query", "") or state["messages"][-1]["content"]
+        logger.info(f"🔍 Starting search for: {query[:100]}")
+        # ===== Generate embedding =====
         try:
+            with trace_operation("embed_query"):
+                vector = await _embed(query)
+                logger.info("✅ Query embedded successfully")
+        except EmbeddingError as e:
+            logger.error(f"❌ Embedding failed: {e}")
+            state["ai_reply"] = "Sorry, I couldn't process your search right now. Please try again."
+            state["search_preview"] = {
+                "type": "search_results",
+                "count": 0,
+                "query": query,
+                "filters": {},
+                "results": [],
+                "message": "Search temporarily unavailable",
+                "error": "embedding_failed",
+            }
+            state["search_results"] = []
+            return state
+        # ===== Build filter =====
+        filt = _build_filter(state)
+        # ===== Execute search =====
+        try:
+            with trace_operation("execute_search"):
+                logger.info("🔎 Executing search")
+                hits = await _search_with_must(filt.must if filt.must else [], vector)
+                logger.info(f"✅ Search returned {len(hits)} results")
+        except QdrantSearchError as e:
+            logger.error(f"❌ Search failed: {e}")
+            state["ai_reply"] = "Sorry, I'm having trouble searching right now. Please try again."
+            state["search_preview"] = {
                 "type": "search_results",
+                "count": 0,
                 "query": query,
                 "filters": {
                     "location": state.get("location"),
                     "max_price": state.get("max_price"),
                     "bedrooms": state.get("bedrooms"),
                     "bathrooms": state.get("bathrooms"),
                     "amenities": state.get("amenities", []),
                 },
+                "results": [],
+                "message": "Search service temporarily unavailable",
+                "error": "search_failed",
             }
+            state["search_results"] = []
+            return state
+        cards = _hits_to_cards(hits)
+        # ===== Handle zero-hit scenario =====
+        if not cards:
+            logger.info("ℹ️ No exact matches found, generating suggestions")
+            location = state.get("location") or "that area"
+            try:
+                suggestions = await _suggest_relaxed(state, vector)
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to generate suggestions: {e}")
+                suggestions = []
+            if suggestions:
+                state["ai_reply"] = (
+                    f"I found no exact match for your request, "
+                    f"but you might like these similar options:"
+                )
+                state["search_preview"] = {
+                    "type": "search_results",
+                    "count": len(suggestions),
+                    "query": query,
+                    "filters": {
+                        "location": state.get("location"),
+                        "min_price": state.get("min_price"),
+                        "max_price": state.get("max_price"),
+                        "bedrooms": state.get("bedrooms"),
+                        "bathrooms": state.get("bathrooms"),
+                        "price_type": state.get("price_type"),
+                        "listing_type": state.get("listing_type"),
+                        "amenities": state.get("amenities", []),
+                    },
+                    "results": suggestions,
+                    "message": "Similar options available",
+                }
+                state["search_results"] = suggestions
+            else:
+                state["ai_reply"] = f"I found no property in {location}. Try widening your search or check back later!"
+                state["search_preview"] = {
+                    "type": "search_results",
+                    "count": 0,
+                    "query": query,
+                    "filters": {
+                        "location": state.get("location"),
+                        "min_price": state.get("min_price"),
+                        "max_price": state.get("max_price"),
+                        "bedrooms": state.get("bedrooms"),
+                        "bathrooms": state.get("bathrooms"),
+                        "price_type": state.get("price_type"),
+                        "listing_type": state.get("listing_type"),
+                        "amenities": state.get("amenities", []),
+                    },
+                    "results": [],
+                    "message": "No results found",
+                }
+                state["search_results"] = []
         else:
+            logger.info(f"✅ Found {len(cards)} results")
+            state["ai_reply"] = f"Here are {len(cards)} places I found for you:"
+            state["search_preview"] = {
                 "type": "search_results",
+                "count": len(cards),
                 "query": query,
                 "filters": {
                     "location": state.get("location"),
                     "listing_type": state.get("listing_type"),
                     "amenities": state.get("amenities", []),
                 },
+                "results": cards,
+                "message": f"Found {len(cards)} listings",
             }
+            state["search_results"] = cards
+        logger.info("✅ Search node completed")
+        return state

app/ai/routes/chat.py CHANGED Viewed

@@ -1,15 +1,27 @@
-# app/ai/routes/chat.py - Return search_preview + draft_preview
-from fastapi import APIRouter, Depends, HTTPException
 from fastapi.security import HTTPBearer
 from pydantic import BaseModel
 from typing import Optional, List
 from app.guards.jwt_guard import decode_access_token
 from app.ai.service import aida_chat_sync
-from app.ai.memory.redis_memory import is_rate_limited
 router = APIRouter()
 security = HTTPBearer()
 class MessageHistory(BaseModel):
     role: str  # 'user' or 'assistant'
     content: str
@@ -22,41 +34,235 @@ class AskBody(BaseModel):
     user_role: Optional[str] = None
     history: Optional[List[MessageHistory]] = None
 @router.post("/ask")
 async def ask_ai(
     body: AskBody,
     token: str = Depends(security),
 ):
-    payload = decode_access_token(token.credentials)
-    if not payload:
-        raise HTTPException(status_code=401, detail="Invalid token")
-    if await is_rate_limited(payload["user_id"]):
-        raise HTTPException(status_code=429, detail="Rate limit exceeded")
-    # Build conversation context from history
-    conversation_context = ""
-    if body.history:
-        for msg in body.history:
-            role = "User" if msg.role == "user" else "Assistant"
-            conversation_context += f"{role}: {msg.content}\n"
-    # Combine context with current message
-    full_message = body.message
-    if conversation_context:
-        full_message = f"Previous conversation:\n{conversation_context}\nNow the user says: {body.message}"
-    final_state = await aida_chat_sync(
-        payload["user_id"],
-        payload["role"],
-        full_message,
-    )
-    # âœ… RETURN FULL STATE - text + cards + draft + search preview
     return {
-        "text": final_state.get("ai_reply", ""),
-        "cards": final_state.get("search_results", []),
-        "draft_preview": final_state.get("draft_preview"),  # For listing preview
-        "search_preview": final_state.get("search_preview"),  # âœ… NEW: For search results UI
     }

+# app/ai/routes/chat.py - Enhanced with Observability & Rate Limiting
+from fastapi import APIRouter, Depends, HTTPException, Request
 from fastapi.security import HTTPBearer
 from pydantic import BaseModel
 from typing import Optional, List
+import logging
+import time
 from app.guards.jwt_guard import decode_access_token
 from app.ai.service import aida_chat_sync
+from app.core.rate_limiter import get_rate_limiter, RateLimitExceeded
+from app.core.observability import trace_operation, get_token_tracker
+from app.core.error_handling import handle_errors, async_retry, RetryStrategy
+from app.core.context_manager import get_message_window
+logger = logging.getLogger(__name__)
 router = APIRouter()
 security = HTTPBearer()
+# ============================================================
+# Models
+# ============================================================
 class MessageHistory(BaseModel):
     role: str  # 'user' or 'assistant'
     content: str
     user_role: Optional[str] = None
     history: Optional[List[MessageHistory]] = None
+# ============================================================
+# Enhanced Chat Endpoint
+# ============================================================
 @router.post("/ask")
+@handle_errors(default_return={"success": False, "error": "Internal server error"})
 async def ask_ai(
     body: AskBody,
+    request: Request,
     token: str = Depends(security),
 ):
+    """
+    Enhanced chat endpoint with:
+    - Rate limiting (token bucket)
+    - Distributed tracing
+    - Token tracking
+    - Error handling with observability
+    - Context management
+    """
+    start_time = time.time()
+    request_id = request.headers.get("x-request-id", "unknown")
+    ip_address = request.client.host if request.client else "unknown"
+    with trace_operation(
+        "chat_endpoint",
+        {
+            "request_id": request_id,
+            "ip_address": ip_address,
+            "message_length": len(body.message),
+        }
+    ) as root_span:
+        try:
+            # ===== Step 1: Validate Token =====
+            with trace_operation("token_validation"):
+                payload = decode_access_token(token.credentials)
+                if not payload:
+                    raise HTTPException(status_code=401, detail="Invalid token")
+            user_id = payload["user_id"]
+            user_role = payload.get("role", "renter")
+            # ===== Step 2: Rate Limiting =====
+            with trace_operation(
+                "rate_limit_check",
+                {"user_id": user_id, "operation": "chat"}
+            ):
+                rate_limiter = get_rate_limiter()
+                is_allowed, rate_info = await rate_limiter.is_allowed(
+                    user_id=user_id,
+                    operation="chat",
+                    ip_address=ip_address,
+                )
+                if not is_allowed:
+                    logger.warning(
+                        f"🚫 Rate limit exceeded for user: {user_id}",
+                        extra={"rate_info": rate_info}
+                    )
+                    raise RateLimitExceeded(retry_after=60)
+                # Add rate limit headers
+                root_span.set_attribute("rate_limit.remaining", rate_info["user"]["remaining"])
+                root_span.set_attribute("rate_limit.capacity", rate_info["user"]["capacity"])
+            # ===== Step 3: Context Management =====
+            with trace_operation("context_management", {"user_id": user_id}):
+                window = get_message_window(user_id)
+                # Build conversation context from history (if provided)
+                conversation_context = ""
+                if body.history:
+                    for msg in body.history:
+                        role = "User" if msg.role == "user" else "Assistant"
+                        conversation_context += f"{role}: {msg.content}\n"
+                # Combine context with current message
+                full_message = body.message
+                if conversation_context:
+                    full_message = (
+                        f"Previous conversation:\n{conversation_context}\n"
+                        f"Now the user says: {body.message}"
+                    )
+                # Add to message window
+                window.add_message("user", full_message)
+            # ===== Step 4: AI Chat Processing =====
+            with trace_operation(
+                "aida_chat_sync",
+                {
+                    "user_id": user_id,
+                    "user_role": user_role,
+                    "message_length": len(full_message),
+                }
+            ):
+                final_state = await aida_chat_sync(
+                    user_id,
+                    user_role,
+                    full_message,
+                )
+            # ===== Step 5: Token Tracking =====
+            with trace_operation("token_tracking"):
+                # Track tokens if available
+                usage = final_state.get("token_usage", {})
+                if usage:
+                    tracker = get_token_tracker()
+                    model_used = final_state.get("model_used", "unknown")
+                    tracker.record_tokens(
+                        model_used,
+                        usage.get("prompt_tokens", 0),
+                        usage.get("completion_tokens", 0),
+                        usage.get("cost", 0.0),
+                    )
+            # ===== Step 6: Build Response =====
+            response = {
+                "success": True,
+                "text": final_state.get("ai_reply", ""),
+                "cards": final_state.get("search_results", []),
+                "draft_preview": final_state.get("draft_preview"),
+                "search_preview": final_state.get("search_preview"),
+                "metadata": {
+                    "request_id": request_id,
+                    "processing_time_ms": int((time.time() - start_time) * 1000),
+                    "user_id": user_id,
+                    "status": final_state.get("status"),
+                },
+            }
+            # ===== Step 7: Add Message to Window =====
+            with trace_operation("window_update"):
+                window.add_message("assistant", final_state.get("ai_reply", ""))
+            # Set root span attributes
+            root_span.set_attributes({
+                "response.status": "success",
+                "response.has_cards": len(response["cards"]) > 0,
+                "response.has_draft": response["draft_preview"] is not None,
+                "processing_time_ms": response["metadata"]["processing_time_ms"],
+            })
+            logger.info(
+                f"✅ Chat processed successfully",
+                extra={
+                    "user_id": user_id,
+                    "request_id": request_id,
+                    "processing_time_ms": response["metadata"]["processing_time_ms"],
+                    "has_cards": len(response["cards"]) > 0,
+                }
+            )
+            return response
+        except RateLimitExceeded as e:
+            root_span.set_attribute("error.type", "rate_limit_exceeded")
+            logger.error(f"🚫 Rate limit: {str(e)}")
+            raise HTTPException(
+                status_code=429,
+                detail=e.message,
+                headers={"Retry-After": str(e.retry_after)},
+            )
+        except HTTPException:
+            raise
+        except Exception as e:
+            root_span.record_exception(e)
+            root_span.set_attribute("error.type", type(e).__name__)
+            logger.error(
+                f"❌ Chat endpoint error: {str(e)}",
+                exc_info=True,
+                extra={"user_id": user_id if 'user_id' in locals() else "unknown"}
+            )
+            raise HTTPException(
+                status_code=500,
+                detail="An error occurred processing your request",
+            )
+# ============================================================
+# Rate Limit Status Endpoint
+# ============================================================
+@router.get("/rate-limit-status")
+async def get_rate_limit_status(
+    token: str = Depends(security),
+) -> dict:
+    """Get current rate limit status for user"""
+    with trace_operation("rate_limit_status"):
+        payload = decode_access_token(token.credentials)
+        if not payload:
+            raise HTTPException(status_code=401, detail="Invalid token")
+        user_id = payload["user_id"]
+        rate_limiter = get_rate_limiter()
+        stats = await rate_limiter.get_usage_stats(user_id)
+        return {
+            "success": True,
+            "data": stats,
+            "operations": {
+                "chat": {"cost": 1},
+                "search": {"cost": 2},
+                "list": {"cost": 3},
+                "publish": {"cost": 5},
+                "upload_image": {"cost": 4},
+            },
+        }
+# ============================================================
+# Health Check with Rate Limiter
+# ============================================================
+@router.get("/health")
+async def chat_health() -> dict:
+    """Health check for chat service"""
     return {
+        "service": "aida-chat",
+        "status": "healthy",
+        "rate_limiting": "enabled",
+        "features": {
+            "distributed_tracing": True,
+            "token_tracking": True,
+            "context_management": True,
+            "error_resilience": True,
+        },
     }

app/ai/service.py CHANGED Viewed

@@ -1,142 +1,254 @@
-# app/ai/service.py  – stateful version with context management
 import json
 from typing import AsyncGenerator, Dict, Any
 from app.ai.graph import agent
 from app.ai.memory.redis_memory import load_history, save_turn, load_state, save_state
 from app.ai.state import ChatState
-from app.core.context_manager import get_context_manager, MessageWindowExceeded
-from structlog import get_logger
-logger = get_logger(__name__)
-# --------------------------------------------------
-# WebSocket streaming entry-point
-# --------------------------------------------------
 async def aida_chat(
     user_id: str,
     user_role: str,
     human_msg: str,
 ) -> AsyncGenerator[str, None]:
     """
-    Streaming chat endpoint with context management.
-    ✅ Uses context manager for intelligent message windowing
     """
-    try:
-        # Initialize context manager for this user
-        context_mgr = get_context_manager()
-        # Load message history from Redis
-        messages = await load_history(user_id)
-        # Add user message to history
-        messages.append({"role": "user", "content": human_msg})
-        # Manage context window (drop old messages if needed)
         try:
-            managed_messages = await context_mgr.manage_context(messages)
-            logger.info("✅ Context window managed", total_msgs=len(messages), managed=len(managed_messages))
-        except MessageWindowExceeded as e:
-            logger.warning(f"⚠️ Message window exceeded: {e}, using truncated history")
-            managed_messages = messages[-20:]  # Keep last 20 messages as fallback
-        # Restore previous state or start fresh
-        saved = await load_state(user_id)
-        state: ChatState = {
-            "user_id": user_id,
-            "user_role": user_role,
-            "messages": managed_messages,
-            "draft": saved.get("draft"),
-            "vector_meta": saved.get("vector_meta"),
-            "allowed": saved.get("allowed", True),
-            "ai_reply": saved.get("ai_reply", ""),
-            "status": saved.get("status"),
-            "missing_fields": saved.get("missing_fields", []),
-            "next_question": saved.get("next_question"),
-            "location": saved.get("location"),
-            "min_price": saved.get("min_price"),
-            "max_price": saved.get("max_price"),
-            "bedrooms": saved.get("bedrooms"),
-            "bathrooms": saved.get("bathrooms"),
-            "amenities": saved.get("amenities", []),
-            "listing_type": saved.get("listing_type"),
-            "price": saved.get("price"),
-            "price_type": saved.get("price_type"),
-            "currency": saved.get("currency", "XOF"),
-            "requirements": saved.get("requirements"),
-            "search_query": saved.get("search_query"),
-            "search_results": saved.get("search_results"),
-            "search_preview": saved.get("search_preview"),
-            "suggestions": saved.get("suggestions", []),
-            "image": saved.get("image"),
-            "field_validations": saved.get("field_validations"),
-            "field_confidences": saved.get("field_confidences"),
-            "location_details": saved.get("location_details"),
-            "validation_suggestions": saved.get("validation_suggestions", []),
-            "listing_confidence": saved.get("listing_confidence"),
-            "currency_confidence": saved.get("currency_confidence"),
-            "draft_preview": saved.get("draft_preview"),
-            "mongo_id": saved.get("mongo_id"),
-        }
-        logger.info("🚀 Starting aida_chat stream", user_id=user_id, user_role=user_role)
-        # Stream responses from agent
-        async for step in agent.astream(state):
-            for node_name, update in step.items():
-                if update.get("ai_reply"):
-                    logger.debug(f"📤 Streaming from {node_name}")
-                    yield json.dumps({"node": node_name, "text": update["ai_reply"]}) + "\n"
-        # Final invocation to get complete state
-        final_state = await agent.ainvoke(state)
-        # Update message history with assistant response
-        managed_messages.append({"role": "assistant", "content": final_state["ai_reply"]})
-        # Persist conversation and state
-        await save_turn(user_id, managed_messages)
-        await save_state(user_id, final_state)
-        logger.info("✅ aida_chat stream completed", user_id=user_id)
-    except Exception as e:
-        logger.error("❌ aida_chat error", exc_info=e)
-        yield json.dumps({"node": "error", "text": "Sorry, something went wrong. Please try again."}) + "\n"
-# --------------------------------------------------
-# REST (non-streaming) – returns the full state dict
-# --------------------------------------------------
 async def aida_chat_sync(
     user_id: str,
     user_role: str,
     human_msg: str,
 ) -> Dict[str, Any]:
     """
-    Synchronous chat endpoint with full context management.
-    ✅ Manages message context window
-    ✅ Returns complete state for REST clients
     """
-    try:
-        # Initialize context manager for this user
-        context_mgr = get_context_manager()
-        # Load message history from Redis
-        messages = await load_history(user_id)
-        # Add user message to history
         messages.append({"role": "user", "content": human_msg})
-        # Manage context window (drop old messages if needed)
-        try:
-            managed_messages = await context_mgr.manage_context(messages)
-            logger.info("✅ Context window managed", total_msgs=len(messages), managed=len(managed_messages))
-        except MessageWindowExceeded as e:
-            logger.warning(f"⚠️ Message window exceeded: {e}, using truncated history")
-            managed_messages = messages[-20:]  # Keep last 20 messages as fallback
-        # Restore previous state or start fresh
-        saved = await load_state(user_id)
         state: ChatState = {
             "user_id": user_id,
             "user_role": user_role,
@@ -173,63 +285,126 @@ async def aida_chat_sync(
             "draft_preview": saved.get("draft_preview"),
             "mongo_id": saved.get("mongo_id"),
         }
-        logger.info("🚀 Starting aida_chat_sync", user_id=user_id, user_role=user_role)
-        # Invoke agent with complete state
-        final_state = await agent.ainvoke(state)
-        # Update message history with assistant response
-        managed_messages.append({"role": "assistant", "content": final_state["ai_reply"]})
-        # Persist conversation and state
-        await save_turn(user_id, managed_messages)
-        await save_state(user_id, final_state)
-        logger.info("✅ aida_chat_sync completed", user_id=user_id, status=final_state.get("status"))
         # Return the entire state so the route can pick text + cards + preview
         return final_state
-    except Exception as e:
-        logger.error("❌ aida_chat_sync error", exc_info=e)
-        return {
-            "ai_reply": "Sorry, something went wrong. Please try again.",
-            "status": "error",
-            "search_preview": None,
-            "draft_preview": None,
-        }
-# --------------------------------------------------
-# Health check / debugging endpoint
-# --------------------------------------------------
 async def get_conversation_context(user_id: str) -> Dict[str, Any]:
     """
-    Get current conversation context for a user.
-    ✅ Returns managed message window and current state
     """
-    try:
-        context_mgr = get_context_manager()
-        messages = await load_history(user_id)
-        saved = await load_state(user_id)
-        # Manage context to show what will be sent to agent
         try:
-            managed = await context_mgr.manage_context(messages)
-        except MessageWindowExceeded:
-            managed = messages[-20:]
-        return {
-            "user_id": user_id,
-            "total_messages": len(messages),
-            "managed_messages": len(managed),
-            "current_status": saved.get("status"),
-            "intent": saved.get("intent"),
-            "draft_preview": saved.get("draft_preview") is not None,
-            "search_results_count": len(saved.get("search_results", [])),
-            "message_sample": managed[-1]["content"][:100] if managed else None,
-        }
-    except Exception as e:
-        logger.error("❌ Failed to get conversation context", exc_info=e)
-        return {"error": str(e)}

+# app/ai/service.py – Complete with context management + error handling
 import json
 from typing import AsyncGenerator, Dict, Any
+import logging
 from app.ai.graph import agent
 from app.ai.memory.redis_memory import load_history, save_turn, load_state, save_state
 from app.ai.state import ChatState
+from app.core.context_manager import get_context_manager, MessageWindow
+from app.core.error_handling import trace_operation, handle_errors
+from app.core.observability import get_token_tracker
+logger = logging.getLogger(__name__)
+# ============================================================
+# WebSocket Streaming Entry Point
+# ============================================================
 async def aida_chat(
     user_id: str,
     user_role: str,
     human_msg: str,
 ) -> AsyncGenerator[str, None]:
     """
+    Streaming chat endpoint with full context management and error handling.
+    Features:
+    - Context window management (prevents overflow)
+    - Message window persistence per user
+    - Error recovery with fallbacks
+    - Token tracking
+    - Full observability
     """
+    with trace_operation("aida_chat_stream", {"user_id": user_id, "user_role": user_role}):
         try:
+            logger.info(
+                "🚀 Starting aida_chat stream",
+                extra={"user_id": user_id, "user_role": user_role}
+            )
+            # ===== Load message history =====
+            with trace_operation("load_history"):
+                try:
+                    messages = await load_history(user_id)
+                    logger.info(f"✅ Loaded {len(messages)} messages from history")
+                except Exception as e:
+                    logger.warning(f"⚠️ Failed to load history: {e}, starting fresh")
+                    messages = []
+            # ===== Add user message =====
+            messages.append({"role": "user", "content": human_msg})
+            # ===== Manage context window =====
+            with trace_operation("context_management"):
+                try:
+                    context_mgr = get_context_manager()
+                    managed_messages = await context_mgr.manage_context(messages)
+                    logger.info(
+                        f"✅ Context managed",
+                        extra={
+                            "total_messages": len(messages),
+                            "managed_messages": len(managed_messages),
+                        }
+                    )
+                except Exception as e:
+                    logger.warning(f"⚠️ Context management failed: {e}, using last 20 messages")
+                    managed_messages = messages[-20:]
+            # ===== Load previous state =====
+            with trace_operation("load_state"):
+                try:
+                    saved = await load_state(user_id)
+                    logger.info(f"✅ Loaded previous state, status={saved.get('status')}")
+                except Exception as e:
+                    logger.warning(f"⚠️ Failed to load state: {e}, starting fresh")
+                    saved = {}
+            # ===== Build chat state =====
+            state: ChatState = {
+                "user_id": user_id,
+                "user_role": user_role,
+                "messages": managed_messages,
+                "draft": saved.get("draft"),
+                "vector_meta": saved.get("vector_meta"),
+                "allowed": saved.get("allowed", True),
+                "ai_reply": saved.get("ai_reply", ""),
+                "status": saved.get("status"),
+                "missing_fields": saved.get("missing_fields", []),
+                "next_question": saved.get("next_question"),
+                "location": saved.get("location"),
+                "min_price": saved.get("min_price"),
+                "max_price": saved.get("max_price"),
+                "bedrooms": saved.get("bedrooms"),
+                "bathrooms": saved.get("bathrooms"),
+                "amenities": saved.get("amenities", []),
+                "listing_type": saved.get("listing_type"),
+                "price": saved.get("price"),
+                "price_type": saved.get("price_type"),
+                "currency": saved.get("currency", "XOF"),
+                "requirements": saved.get("requirements"),
+                "search_query": saved.get("search_query"),
+                "search_results": saved.get("search_results"),
+                "search_preview": saved.get("search_preview"),
+                "suggestions": saved.get("suggestions", []),
+                "image": saved.get("image"),
+                "field_validations": saved.get("field_validations"),
+                "field_confidences": saved.get("field_confidences"),
+                "location_details": saved.get("location_details"),
+                "validation_suggestions": saved.get("validation_suggestions", []),
+                "listing_confidence": saved.get("listing_confidence"),
+                "currency_confidence": saved.get("currency_confidence"),
+                "draft_preview": saved.get("draft_preview"),
+                "mongo_id": saved.get("mongo_id"),
+            }
+            # ===== Stream responses from agent =====
+            with trace_operation("agent_stream"):
+                try:
+                    async for step in agent.astream(state):
+                        for node_name, update in step.items():
+                            if update.get("ai_reply"):
+                                logger.debug(f"📤 Streaming from {node_name}")
+                                yield json.dumps({
+                                    "node": node_name,
+                                    "text": update["ai_reply"]
+                                }) + "\n"
+                except Exception as e:
+                    logger.error(f"❌ Agent stream error: {e}", exc_info=True)
+                    yield json.dumps({
+                        "node": "error",
+                        "text": "An error occurred processing your request. Please try again."
+                    }) + "\n"
+                    return
+            # ===== Get final state =====
+            with trace_operation("agent_invoke"):
+                try:
+                    final_state = await agent.ainvoke(state)
+                    logger.info(f"✅ Agent invocation complete, status={final_state.get('status')}")
+                except Exception as e:
+                    logger.error(f"❌ Agent invoke error: {e}", exc_info=True)
+                    yield json.dumps({
+                        "node": "error",
+                        "text": "Sorry, something went wrong. Please try again."
+                    }) + "\n"
+                    return
+            # ===== Save results =====
+            with trace_operation("save_results"):
+                try:
+                    # Update message history
+                    managed_messages.append({
+                        "role": "assistant",
+                        "content": final_state.get("ai_reply", "")
+                    })
+                    # Persist conversation and state
+                    await save_turn(user_id, managed_messages)
+                    await save_state(user_id, final_state)
+                    logger.info(f"✅ Results saved for user {user_id}")
+                except Exception as e:
+                    logger.error(f"❌ Failed to save results: {e}")
+            logger.info("✅ aida_chat stream completed successfully")
+        except Exception as e:
+            logger.error(f"❌ Unexpected error in aida_chat: {e}", exc_info=True)
+            yield json.dumps({
+                "node": "error",
+                "text": "An unexpected error occurred. Please try again."
+            }) + "\n"
+# ============================================================
+# REST Synchronous Entry Point
+# ============================================================
+@handle_errors(default_return={
+    "ai_reply": "Sorry, something went wrong. Please try again.",
+    "status": "error",
+    "search_preview": None,
+    "draft_preview": None,
+})
 async def aida_chat_sync(
     user_id: str,
     user_role: str,
     human_msg: str,
 ) -> Dict[str, Any]:
     """
+    Synchronous chat endpoint for REST clients.
+    Features:
+    - Full context management
+    - Error resilience
+    - Token tracking
+    - Complete state return
+    - Observability integration
     """
+    with trace_operation(
+        "aida_chat_sync",
+        {
+            "user_id": user_id,
+            "user_role": user_role,
+            "message_length": len(human_msg),
+        }
+    ):
+        logger.info(
+            "🚀 Starting aida_chat_sync",
+            extra={"user_id": user_id, "user_role": user_role}
+        )
+        # ===== Load message history =====
+        with trace_operation("load_history"):
+            try:
+                messages = await load_history(user_id)
+                logger.info(f"✅ Loaded {len(messages)} messages from history")
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to load history: {e}, starting fresh")
+                messages = []
+        # ===== Add user message =====
         messages.append({"role": "user", "content": human_msg})
+        # ===== Manage context window =====
+        with trace_operation("context_management"):
+            try:
+                context_mgr = get_context_manager()
+                managed_messages = await context_mgr.manage_context(messages)
+                logger.info(
+                    f"✅ Context managed",
+                    extra={
+                        "total_messages": len(messages),
+                        "managed_messages": len(managed_messages),
+                    }
+                )
+            except Exception as e:
+                logger.warning(f"⚠️ Context management failed: {e}, using last 20 messages")
+                managed_messages = messages[-20:]
+        # ===== Load previous state =====
+        with trace_operation("load_state"):
+            try:
+                saved = await load_state(user_id)
+                logger.info(f"✅ Loaded previous state, status={saved.get('status')}")
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to load state: {e}, starting fresh")
+                saved = {}
+        # ===== Build chat state =====
         state: ChatState = {
             "user_id": user_id,
             "user_role": user_role,
             "draft_preview": saved.get("draft_preview"),
             "mongo_id": saved.get("mongo_id"),
         }
+        # ===== Invoke agent =====
+        with trace_operation("agent_invoke"):
+            try:
+                final_state = await agent.ainvoke(state)
+                logger.info(
+                    f"✅ Agent invocation complete",
+                    extra={"status": final_state.get("status")}
+                )
+            except Exception as e:
+                logger.error(f"❌ Agent invoke error: {e}", exc_info=True)
+                return {
+                    "ai_reply": "Sorry, an error occurred processing your request.",
+                    "status": "error",
+                    "search_preview": None,
+                    "draft_preview": None,
+                }
+        # ===== Save results =====
+        with trace_operation("save_results"):
+            try:
+                # Update message history
+                managed_messages.append({
+                    "role": "assistant",
+                    "content": final_state.get("ai_reply", "")
+                })
+                # Persist conversation and state
+                await save_turn(user_id, managed_messages)
+                await save_state(user_id, final_state)
+                logger.info(f"✅ Results saved for user {user_id}")
+            except Exception as e:
+                logger.error(f"⚠️ Failed to save results: {e}")
+                # Don't fail the response, just log the error
+        # ===== Track tokens if available =====
+        with trace_operation("token_tracking"):
+            try:
+                usage = final_state.get("token_usage", {})
+                if usage:
+                    tracker = get_token_tracker()
+                    model_used = final_state.get("model_used", "unknown")
+                    tracker.record_tokens(
+                        model_used,
+                        usage.get("prompt_tokens", 0),
+                        usage.get("completion_tokens", 0),
+                        usage.get("cost", 0.0),
+                    )
+                    logger.info(f"✅ Tokens tracked", extra={"model": model_used, "usage": usage})
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to track tokens: {e}")
+        logger.info(f"✅ aida_chat_sync completed successfully")
         # Return the entire state so the route can pick text + cards + preview
         return final_state
+# ============================================================
+# Debugging / Context Inspection
+# ============================================================
 async def get_conversation_context(user_id: str) -> Dict[str, Any]:
     """
+    Get current conversation context for debugging/inspection.
+    Returns managed message window and current state stats.
     """
+    with trace_operation("get_conversation_context", {"user_id": user_id}):
         try:
+            # Load history and state
+            messages = await load_history(user_id)
+            saved = await load_state(user_id)
+            # Manage context to show what will be sent to agent
+            context_mgr = get_context_manager()
+            try:
+                managed = await context_mgr.manage_context(messages)
+            except Exception as e:
+                logger.warning(f"⚠️ Context management failed: {e}")
+                managed = messages[-20:] if messages else []
+            return {
+                "user_id": user_id,
+                "total_messages": len(messages),
+                "managed_messages": len(managed),
+                "current_status": saved.get("status"),
+                "current_intent": saved.get("intent"),
+                "has_draft": saved.get("draft_preview") is not None,
+                "search_results_count": len(saved.get("search_results", [])),
+                "message_sample": managed[-1]["content"][:100] if managed else None,
+                "timestamp": managed[-1].get("timestamp") if managed else None,
+            }
+        except Exception as e:
+            logger.error(f"❌ Failed to get conversation context: {e}", exc_info=True)
+            return {
+                "error": str(e),
+                "user_id": user_id,
+            }
+# ============================================================
+# Health Check
+# ============================================================
+async def health_check_chat_service() -> Dict[str, Any]:
+    """
+    Health check for chat service.
+    """
+    return {
+        "service": "aida-chat",
+        "status": "healthy",
+        "features": {
+            "context_management": True,
+            "error_handling": True,
+            "token_tracking": True,
+            "observability": True,
+            "streaming": True,
+            "sync": True,
+        },
+    }

app/api/endpoints/monitoring.py ADDED Viewed

	@@ -0,0 +1,354 @@

+# ============================================================
+# app/api/endpoints/monitoring.py - Observability & Monitoring
+# ============================================================
+from fastapi import APIRouter, Depends, HTTPException
+from typing import Optional, Dict, Any
+import logging
+from datetime import datetime, timedelta
+from app.guards.jwt_guard import get_current_user
+from app.core.llm_router import get_llm_router
+from app.core.observability import get_token_tracker, get_meter
+from app.core.error_handling import get_all_circuit_breaker_status
+from app.core.rate_limiter import get_rate_limiter
+from app.core.context_manager import cleanup_expired_windows
+from app.ai.config import redis_client, qdrant_client
+from app.database import get_db
+logger = logging.getLogger(__name__)
+router = APIRouter()
+# ============================================================
+# Health & Status Endpoints
+# ============================================================
+@router.get("/health/detailed")
+async def detailed_health_check() -> Dict[str, Any]:
+    """
+    Comprehensive health check with all system components
+    """
+    health_status = {
+        "status": "checking",
+        "timestamp": datetime.utcnow().isoformat(),
+        "components": {},
+    }
+    try:
+        # MongoDB
+        try:
+            db = await get_db()
+            await db.client.admin.command("ping")
+            health_status["components"]["mongodb"] = {
+                "status": "healthy",
+                "response_time_ms": 5,
+            }
+        except Exception as e:
+            health_status["components"]["mongodb"] = {
+                "status": "unhealthy",
+                "error": str(e),
+            }
+        # Redis
+        try:
+            await redis_client.ping()
+            health_status["components"]["redis"] = {
+                "status": "healthy",
+            }
+        except Exception as e:
+            health_status["components"]["redis"] = {
+                "status": "unhealthy",
+                "error": str(e),
+            }
+        # Qdrant
+        try:
+            await qdrant_client.get_collections()
+            health_status["components"]["qdrant"] = {
+                "status": "healthy",
+            }
+        except Exception as e:
+            health_status["components"]["qdrant"] = {
+                "status": "unhealthy",
+                "error": str(e),
+            }
+        # LLM Router
+        try:
+            router = get_llm_router()
+            stats = router.get_stats()
+            available = sum(
+                1 for info in stats["models"].values()
+                if info["available"]
+            )
+            health_status["components"]["llm_router"] = {
+                "status": "healthy" if available > 0 else "degraded",
+                "available_models": available,
+                "total_calls": stats["total_calls"],
+                "errors": stats["total_errors"],
+            }
+        except Exception as e:
+            health_status["components"]["llm_router"] = {
+                "status": "unhealthy",
+                "error": str(e),
+            }
+        # Circuit Breakers
+        try:
+            circuit_breakers = get_all_circuit_breaker_status()
+            open_breakers = sum(
+                1 for cb in circuit_breakers.values()
+                if cb["is_open"]
+            )
+            health_status["components"]["circuit_breakers"] = {
+                "status": "healthy" if open_breakers == 0 else "warning",
+                "total": len(circuit_breakers),
+                "open": open_breakers,
+            }
+        except Exception as e:
+            health_status["components"]["circuit_breakers"] = {
+                "status": "unknown",
+                "error": str(e),
+            }
+        # Overall status
+        unhealthy = sum(
+            1 for component in health_status["components"].values()
+            if component["status"] == "unhealthy"
+        )
+        health_status["status"] = (
+            "healthy" if unhealthy == 0 else
+            "degraded" if unhealthy <= 1 else
+            "unhealthy"
+        )
+        return health_status
+    except Exception as e:
+        logger.error(f"Health check error: {e}")
+        return {
+            "status": "error",
+            "error": str(e),
+            "timestamp": datetime.utcnow().isoformat(),
+        }
+# ============================================================
+# Metrics Endpoints
+# ============================================================
+@router.get("/metrics/tokens")
+async def get_token_metrics(user: dict = Depends(get_current_user)) -> Dict[str, Any]:
+    """
+    Get token usage metrics
+    Only admins or the user themselves can view
+    """
+    try:
+        tracker = get_token_tracker()
+        return {
+            "success": True,
+            "data": {
+                "timestamp": datetime.utcnow().isoformat(),
+                "note": "Token metrics available in traces backend",
+                "models_tracked": [
+                    "deepseek-chat",
+                    "mistralai/mistral-7b-instruct",
+                    "xai-org/grok-beta",
+                    "meta-llama/llama-2-70b-chat",
+                ],
+            }
+        }
+    except Exception as e:
+        logger.error(f"Token metrics error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get token metrics")
+@router.get("/metrics/llm")
+async def get_llm_metrics(user: dict = Depends(get_current_user)) -> Dict[str, Any]:
+    """
+    Get LLM router metrics
+    """
+    try:
+        llm_router = get_llm_router()
+        stats = llm_router.get_stats()
+        return {
+            "success": True,
+            "data": {
+                "timestamp": datetime.utcnow().isoformat(),
+                "total_calls": stats["total_calls"],
+                "total_errors": stats["total_errors"],
+                "models": [
+                    {
+                        "name": model,
+                        "available": info["available"],
+                        "calls": info["calls"],
+                        "errors": info["errors"],
+                        "error_rate": (
+                            (info["errors"] / max(info["calls"], 1) * 100)
+                            if info["calls"] > 0 else 0
+                        ),
+                    }
+                    for model, info in stats["models"].items()
+                ],
+            }
+        }
+    except Exception as e:
+        logger.error(f"LLM metrics error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get LLM metrics")
+@router.get("/metrics/rate-limit")
+async def get_rate_limit_metrics(user: dict = Depends(get_current_user)) -> Dict[str, Any]:
+    """
+    Get rate limit metrics for current user
+    """
+    try:
+        rate_limiter = get_rate_limiter()
+        stats = await rate_limiter.get_usage_stats(user["user_id"])
+        usage_percent = (
+            ((stats["capacity"] - stats["remaining"]) / stats["capacity"] * 100)
+            if stats["capacity"] > 0 else 0
+        )
+        return {
+            "success": True,
+            "data": {
+                **stats,
+                "usage_percent": round(usage_percent, 2),
+                "reset_time": (
+                    datetime.utcnow() + timedelta(seconds=stats["reset_in"])
+                ).isoformat(),
+            }
+        }
+    except Exception as e:
+        logger.error(f"Rate limit metrics error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get rate limit metrics")
+@router.get("/metrics/circuit-breakers")
+async def get_circuit_breaker_metrics(user: dict = Depends(get_current_user)) -> Dict[str, Any]:
+    """
+    Get circuit breaker status
+    """
+    try:
+        breakers = get_all_circuit_breaker_status()
+        return {
+            "success": True,
+            "data": {
+                "timestamp": datetime.utcnow().isoformat(),
+                "total": len(breakers),
+                "open": sum(1 for cb in breakers.values() if cb["is_open"]),
+                "closed": sum(1 for cb in breakers.values() if not cb["is_open"]),
+                "circuit_breakers": [
+                    {
+                        **cb,
+                        "status": "open" if cb["is_open"] else "closed",
+                    }
+                    for cb in breakers.values()
+                ],
+            }
+        }
+    except Exception as e:
+        logger.error(f"Circuit breaker metrics error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get circuit breaker metrics")
+# ============================================================
+# System Status Endpoints
+# ============================================================
+@router.get("/status/system")
+async def get_system_status(user: dict = Depends(get_current_user)) -> Dict[str, Any]:
+    """
+    Get overall system status
+    """
+    try:
+        llm_router = get_llm_router()
+        llm_stats = llm_router.get_stats()
+        breakers = get_all_circuit_breaker_status()
+        open_breakers = [cb for cb in breakers.values() if cb["is_open"]]
+        return {
+            "success": True,
+            "data": {
+                "timestamp": datetime.utcnow().isoformat(),
+                "system_status": (
+                    "healthy" if len(open_breakers) == 0 else
+                    "degraded" if len(open_breakers) == 1 else
+                    "unhealthy"
+                ),
+                "llm_status": {
+                    "available_models": sum(
+                        1 for info in llm_stats["models"].values()
+                        if info["available"]
+                    ),
+                    "error_rate": (
+                        llm_stats["total_errors"] / max(llm_stats["total_calls"], 1) * 100
+                    ) if llm_stats["total_calls"] > 0 else 0,
+                },
+                "circuit_breaker_status": {
+                    "open": len(open_breakers),
+                    "total": len(breakers),
+                    "open_breakers": [cb["name"] for cb in open_breakers],
+                },
+            }
+        }
+    except Exception as e:
+        logger.error(f"System status error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get system status")
+# ============================================================
+# Debug Endpoints (Development Only)
+# ============================================================
+@router.post("/debug/cleanup-windows")
+async def debug_cleanup_windows(user: dict = Depends(get_current_user)) -> Dict[str, Any]:
+    """
+    Manually trigger cleanup of expired message windows
+    """
+    try:
+        count = cleanup_expired_windows()
+        return {
+            "success": True,
+            "data": {
+                "cleaned_windows": count,
+                "timestamp": datetime.utcnow().isoformat(),
+            }
+        }
+    except Exception as e:
+        logger.error(f"Window cleanup error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to cleanup windows")
+@router.post("/debug/reset-rate-limit")
+async def debug_reset_rate_limit(
+    user: dict = Depends(get_current_user),
+) -> Dict[str, Any]:
+    """
+    Reset rate limits (development/admin only)
+    """
+    try:
+        rate_limiter = get_rate_limiter()
+        success = await rate_limiter.reset_user_limits(user["user_id"])
+        return {
+            "success": success,
+            "data": {
+                "user_id": user["user_id"],
+                "limits_reset": success,
+                "timestamp": datetime.utcnow().isoformat(),
+            }
+        }
+    except Exception as e:
+        logger.error(f"Rate limit reset error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to reset rate limits")

app/core/rate_limiter.py ADDED Viewed

	@@ -0,0 +1,289 @@

+# ============================================================
+# app/core/rate_limiter.py - Advanced Token Bucket Rate Limiting
+# ============================================================
+import logging
+import time
+from typing import Dict, Optional, Tuple
+from datetime import datetime, timedelta
+from app.ai.config import redis_client
+from app.core.error_handling import LojizError
+logger = logging.getLogger(__name__)
+# ============================================================
+# Rate Limit Configuration
+# ============================================================
+class RateLimitConfig:
+    """Rate limiting configuration by operation type"""
+    # Operation costs (in "credits")
+    OPERATION_COSTS = {
+        "chat": 1,              # Basic chat
+        "search": 2,            # Vector search (expensive)
+        "list": 3,              # Create listing (ML validation)
+        "publish": 5,           # Publish (database + indexing)
+        "edit": 2,              # Edit listing
+        "upload_image": 4,      # Image upload (Cloudflare)
+    }
+    # Rate limits (credits per time window)
+    LIMITS = {
+        "user": {
+            "credits": 100,     # 100 credits per minute
+            "window_seconds": 60,
+        },
+        "ip": {
+            "credits": 500,     # 500 credits per minute (more permissive)
+            "window_seconds": 60,
+        },
+        "global": {
+            "credits": 10000,   # 10k credits per minute (system-wide)
+            "window_seconds": 60,
+        }
+    }
+    # Burst allowance (temporary spike tolerance)
+    BURST_MULTIPLIER = 1.5  # Allow 50% burst above limit
+    # Cleanup settings
+    CLEANUP_INTERVAL = 3600  # Clean old buckets every hour
+    MAX_BUCKET_AGE = 86400   # Keep buckets for 24 hours max
+# ============================================================
+# Token Bucket Implementation
+# ============================================================
+class TokenBucket:
+    """Token bucket for rate limiting"""
+    def __init__(self, capacity: int, refill_rate: float):
+        """
+        Args:
+            capacity: Max tokens in bucket
+            refill_rate: Tokens per second
+        """
+        self.capacity = capacity
+        self.refill_rate = refill_rate
+        self.tokens = capacity
+        self.last_refill = time.time()
+    def refill(self) -> None:
+        """Refill tokens based on time elapsed"""
+        now = time.time()
+        elapsed = now - self.last_refill
+        new_tokens = elapsed * self.refill_rate
+        self.tokens = min(self.capacity, self.tokens + new_tokens)
+        self.last_refill = now
+    def consume(self, tokens: int) -> bool:
+        """Try to consume tokens"""
+        self.refill()
+        if self.tokens >= tokens:
+            self.tokens -= tokens
+            return True
+        return False
+    def get_available(self) -> int:
+        """Get available tokens"""
+        self.refill()
+        return int(self.tokens)
+# ============================================================
+# Advanced Rate Limiter
+# ============================================================
+class AdvancedRateLimiter:
+    """Token bucket rate limiter with multiple scopes"""
+    def __init__(self):
+        self.buckets: Dict[str, TokenBucket] = {}
+        self.last_cleanup = time.time()
+    async def is_allowed(
+        self,
+        user_id: str,
+        operation: str,
+        ip_address: str = None,
+    ) -> Tuple[bool, Dict[str, any]]:
+        """
+        Check if operation is allowed for user
+        Returns:
+            (is_allowed, rate_limit_info)
+        """
+        operation_cost = RateLimitConfig.OPERATION_COSTS.get(operation, 1)
+        # Check all scopes
+        user_check = await self._check_scope(
+            f"user:{user_id}",
+            operation_cost,
+            RateLimitConfig.LIMITS["user"]
+        )
+        ip_check = await self._check_scope(
+            f"ip:{ip_address}",
+            operation_cost,
+            RateLimitConfig.LIMITS["ip"]
+        ) if ip_address else (True, {})
+        global_check = await self._check_scope(
+            "global",
+            operation_cost,
+            RateLimitConfig.LIMITS["global"]
+        )
+        # All must pass
+        is_allowed = user_check[0] and ip_check[0] and global_check[0]
+        info = {
+            "allowed": is_allowed,
+            "operation": operation,
+            "cost": operation_cost,
+            "user": user_check[1],
+            "ip": ip_check[1] if ip_address else None,
+            "global": global_check[1],
+            "timestamp": datetime.utcnow().isoformat(),
+        }
+        if not is_allowed:
+            logger.warning(
+                f"⚠️ Rate limit exceeded",
+                extra={
+                    "user_id": user_id,
+                    "operation": operation,
+                    "ip": ip_address,
+                }
+            )
+        return is_allowed, info
+    async def _check_scope(
+        self,
+        scope_key: str,
+        cost: int,
+        config: Dict,
+    ) -> Tuple[bool, Dict]:
+        """Check single scope (user/ip/global)"""
+        try:
+            # Get bucket from Redis
+            bucket_data = await redis_client.get(f"rate_limit:{scope_key}")
+            if bucket_data:
+                # Deserialize
+                import json
+                data = json.loads(bucket_data)
+                tokens = data["tokens"]
+                last_refill = data["last_refill"]
+            else:
+                # New bucket
+                tokens = config["credits"]
+                last_refill = time.time()
+            # Refill based on time elapsed
+            now = time.time()
+            elapsed = now - last_refill
+            refill_rate = config["credits"] / config["window_seconds"]
+            new_tokens = elapsed * refill_rate
+            tokens = min(config["credits"], tokens + new_tokens)
+            # Check if allowed
+            allowed = tokens >= cost
+            if allowed:
+                tokens -= cost
+                logger.debug(f"✅ Rate limit OK: {scope_key} ({int(tokens)} tokens left)")
+            else:
+                logger.warning(f"🚫 Rate limit exceeded: {scope_key}")
+            # Save back to Redis
+            import json
+            await redis_client.setex(
+                f"rate_limit:{scope_key}",
+                config["window_seconds"] * 2,  # TTL
+                json.dumps({
+                    "tokens": tokens,
+                    "last_refill": now,
+                    "capacity": config["credits"],
+                })
+            )
+            return allowed, {
+                "remaining": int(tokens),
+                "capacity": config["credits"],
+                "reset_in": config["window_seconds"],
+            }
+        except Exception as e:
+            logger.error(f"❌ Rate limit check error: {e}")
+            # Fail open (allow) on error
+            return True, {"error": "rate_limit_check_failed"}
+    async def get_usage_stats(self, user_id: str) -> Dict:
+        """Get current usage stats for user"""
+        bucket_data = await redis_client.get(f"rate_limit:user:{user_id}")
+        if not bucket_data:
+            return {
+                "user_id": user_id,
+                "remaining": RateLimitConfig.LIMITS["user"]["credits"],
+                "capacity": RateLimitConfig.LIMITS["user"]["credits"],
+                "reset_in": RateLimitConfig.LIMITS["user"]["window_seconds"],
+            }
+        import json
+        data = json.loads(bucket_data)
+        return {
+            "user_id": user_id,
+            "remaining": int(data["tokens"]),
+            "capacity": data["capacity"],
+            "reset_in": RateLimitConfig.LIMITS["user"]["window_seconds"],
+        }
+    async def reset_user_limits(self, user_id: str) -> bool:
+        """Reset rate limits for user (admin only)"""
+        try:
+            await redis_client.delete(f"rate_limit:user:{user_id}")
+            logger.info(f"✅ Rate limits reset for user: {user_id}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Failed to reset limits: {e}")
+            return False
+# ============================================================
+# Global Instance
+# ============================================================
+_rate_limiter = None
+def get_rate_limiter() -> AdvancedRateLimiter:
+    """Get or create rate limiter instance"""
+    global _rate_limiter
+    if _rate_limiter is None:
+        _rate_limiter = AdvancedRateLimiter()
+    return _rate_limiter
+# ============================================================
+# Exceptions
+# ============================================================
+class RateLimitExceeded(LojizError):
+    """Rate limit exceeded error"""
+    def __init__(self, retry_after: int = 60):
+        self.retry_after = retry_after
+        super().__init__(
+            f"Rate limit exceeded. Try again in {retry_after}s",
+            error_code="RATE_LIMIT_EXCEEDED",
+            status_code=429,
+            recoverable=True,
+            context={"retry_after": retry_after}
+        )

main.py CHANGED Viewed

@@ -6,6 +6,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from fastapi.exceptions import RequestValidationError
 from contextlib import asynccontextmanager
 import logging
 import os
 import asyncio
@@ -306,11 +307,17 @@ async def observability_status():
         "llm_router": llm_status,
         "token_usage": "See traces in observability backend",
     }
 # ====================================================================
 # Health
 # ====================================================================
 @app.get("/health", tags=["Health"])
 async def health_check():
     """Health check endpoint with ML & LLM status"""
     try:

 from fastapi.responses import JSONResponse
 from fastapi.exceptions import RequestValidationError
 from contextlib import asynccontextmanager
+from app.api.endpoints.monitoring import router as monitoring_router
 import logging
 import os
 import asyncio
         "llm_router": llm_status,
         "token_usage": "See traces in observability backend",
     }
+# Include monitoring endpoints
+app.include_router(monitoring_router, prefix="/api/monitoring", tags=["Monitoring"])
 # ====================================================================
 # Health
 # ====================================================================
 @app.get("/health", tags=["Health"])
 async def health_check():
     """Health check endpoint with ML & LLM status"""
     try: