destinyebuka commited on
Commit
ff21ae5
·
1 Parent(s): 9f3c354
Files changed (46) hide show
  1. app/__pycache__/__init__.cpython-313.pyc +0 -0
  2. app/ai/nodes/intent_node.py +120 -56
  3. app/ai/prompts/system_prompt.txt +88 -137
  4. app/ai/routes/chat.py +64 -24
  5. app/ai/services/dynamic_role_manager.py +299 -0
  6. app/ml/__pycache__/__init__.cpython-313.pyc +0 -0
  7. app/ml/models/combined_training_data.csv +4 -0
  8. app/ml/models/combined_training_data.jsonl +3 -0
  9. app/ml/models/combined_training_data.parquet +3 -0
  10. app/ml/models/dataset_info.json +31 -0
  11. app/ml/models/field_models.pkl +3 -0
  12. app/ml/models/ml_listing_extractor.py +60 -59
  13. app/ml/models/user_role_context_handler.py +607 -0
  14. app/ml/trainning/__init__.py +1 -0
  15. app/ml/trainning/__pycache__/__init__.cpython-313.pyc +0 -0
  16. app/ml/trainning/__pycache__/hf_dataset_downloader.cpython-313.pyc +0 -0
  17. app/ml/trainning/__pycache__/train_enhanced_model.cpython-313.pyc +0 -0
  18. app/ml/trainning/hf_dataset_downloader.py +179 -0
  19. app/ml/trainning/train_enhanced_model.py +123 -0
  20. models/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json +0 -0
  21. models/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja +0 -0
  22. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db +3 -0
  23. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/58d4a9a45664eb9e12de9549c548c09b6134c17f +3 -0
  24. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/59d594003bf59880a884c574bf88ef7555bb0202 +3 -0
  25. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00 +3 -0
  26. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43 +3 -0
  27. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/c79f2b6a0cea6f4b564fed1938984bace9d30ff0 +3 -0
  28. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/cb202bfe2e3c98645018a6d12f182a434c9d3e02 +3 -0
  29. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/d1514c3162bbe87b343f565fadc62e6c06f04f03 +3 -0
  30. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5 +3 -0
  31. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fb140275c155a9c7c5a3b3e0e77a9e839594a938 +3 -0
  32. models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fd1b291129c607e5d49799f87cb219b27f98acdf +3 -0
  33. models/models--sentence-transformers--all-MiniLM-L6-v2/refs/main +3 -0
  34. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json +1 -0
  35. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/README.md +1 -0
  36. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config.json +1 -0
  37. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json +1 -0
  38. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/model.safetensors +1 -0
  39. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json +1 -0
  40. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/sentence_bert_config.json +1 -0
  41. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/special_tokens_map.json +1 -0
  42. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer.json +1 -0
  43. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json +1 -0
  44. models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/vocab.txt +1 -0
  45. scripts/download_training_data.py +5 -0
  46. scripts/train_models.py +5 -0
app/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (278 Bytes). View file
 
app/ai/nodes/intent_node.py CHANGED
@@ -1,4 +1,4 @@
1
- # app/ai/nodes/intent_node.py – FINAL: Multi-LLM routing + context mgmt + observability
2
  import json
3
  import re
4
  from typing import Dict, List
@@ -15,6 +15,7 @@ from app.core.error_handling import (
15
  )
16
  from app.core.observability import get_token_tracker
17
  from app.ml.models.ml_listing_extractor import get_ml_extractor
 
18
  from app.ai.nodes.draft_node import (
19
  _generate_title,
20
  _generate_description,
@@ -27,6 +28,22 @@ MAX_TOKENS = 600
27
  TEMP = 0
28
 
29
  ml_extractor = get_ml_extractor()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # ============================================================
32
  # Helpers
@@ -38,7 +55,7 @@ def _load_system() -> str:
38
  with open("app/ai/prompts/system_prompt.txt", encoding="utf-8") as f:
39
  return f.read()
40
  except FileNotFoundError:
41
- logger.error("System prompt file not found")
42
  return "You are Aida, a helpful AI assistant."
43
 
44
  SYSTEM_PROMPT = _load_system()
@@ -160,90 +177,105 @@ def _build_draft_preview(data: dict) -> dict:
160
  "field_confidences": data.get("field_validations", {}),
161
  }
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  # ============================================================
164
- # Intent Node
165
  # ============================================================
166
 
167
  @async_retry(strategy=RetryStrategy.MODERATE, operation_name="intent_node")
168
  async def intent_node(state: Dict) -> Dict:
169
- """
170
- LangGraph node: Extract and route user intent
171
-
172
- Features:
173
- - Command detection when preview active
174
- - Smart LLM routing with auto-fallback
175
- - Context window management
176
- - ML validation and inference
177
- - Full error handling and observability
178
- """
179
 
180
  current_msg = _get_current_message(state).lower()
181
  status = state.get("status")
 
182
 
183
  with trace_operation(
184
  "intent_node",
185
  {
186
  "status": status,
 
187
  "has_draft": state.get("draft_preview") is not None,
188
  }
189
  ):
190
- # ===== CRITICAL: Handle commands FIRST when preview is active =====
191
  if status in {"preview_shown", "waiting_for_images"} and state.get("draft_preview"):
192
- logger.info(f"🎯 COMMAND DETECTION MODE: status={status}")
193
 
194
- # PUBLISH command
195
  if any(w in current_msg for w in {"publish", "go live", "confirm", "yes", "ok"}):
196
- logger.info("📤 COMMAND: publish")
197
  state["intent"] = "publish"
198
  state["ai_reply"] = ""
199
  return state
200
 
201
- # EDIT command
202
  if "edit" in current_msg or "change" in current_msg or "update" in current_msg:
203
  field = current_msg.replace("edit", "").replace("change", "").replace("update", "").strip()
204
- logger.info(f"✏️ COMMAND: edit field='{field}'")
205
  state["status"] = "collecting"
206
  state["missing_fields"] = [field] if field else ["location"]
207
  state["ai_reply"] = f"Sure! What would you like to change for **{field}**?"
208
  return state
209
 
210
- # DISCARD command
211
  if any(w in current_msg for w in {"discard", "cancel", "delete", "no"}):
212
- logger.info("🗑️ COMMAND: discard")
213
  state["status"] = None
214
  state["draft_preview"] = None
215
  state["intent"] = None
216
  state["ai_reply"] = "Draft discarded. Let me know if you want to list another property!"
217
  return state
218
 
219
- # CASUAL CHAT
220
- logger.info("💬 COMMAND: casual chat")
221
  state["ai_reply"] = "Say **publish** to list, **edit** to change, or **discard** to start over."
222
  return state
223
 
224
- # ===== End command detection =====
225
-
226
  if state.get("status") in ["published", "error"]:
227
- logger.info(f"Skipping intent_node, status={state.get('status')}")
228
  return state
229
 
230
- user_role = state["user_role"]
231
  user_id = state.get("user_id")
232
  human_msg = state["messages"][-1]["content"]
233
 
234
- # ===== LLM CALL WITH SMART ROUTING =====
235
  with trace_operation("llm_call_with_routing"):
236
  try:
237
- # Manage context
238
  context_mgr = get_context_manager()
239
  messages = await context_mgr.manage_context([
240
- {"role": "system", "content": SYSTEM_PROMPT.replace("{user_role}", user_role)},
241
  {"role": "user", "content": human_msg},
242
  ])
243
 
244
- logger.info(f"🤖 Calling LLM with smart routing")
245
 
246
- # Call LLM with smart routing
247
  text, model_used, usage = await call_llm_smart(
248
  messages,
249
  intent=state.get("intent"),
@@ -251,7 +283,6 @@ async def intent_node(state: Dict) -> Dict:
251
  max_tokens=MAX_TOKENS,
252
  )
253
 
254
- # Track tokens
255
  tracker = get_token_tracker()
256
  tracker.record_tokens(
257
  model_used,
@@ -260,7 +291,7 @@ async def intent_node(state: Dict) -> Dict:
260
  )
261
 
262
  logger.info(
263
- f"LLM response from {model_used}",
264
  extra={
265
  "tokens": usage.get("total_tokens", 0),
266
  "duration_ms": usage.get("duration_ms", 0),
@@ -270,26 +301,50 @@ async def intent_node(state: Dict) -> Dict:
270
  raw = text
271
 
272
  except LLMError as e:
273
- logger.error(f"LLM error: {e.message}")
274
  state["ai_reply"] = "Sorry, I'm having trouble. Please try again."
275
  state["status"] = "error"
276
  return state
277
 
278
  except Exception as e:
279
- logger.error(f"Unexpected LLM error: {e}", exc_info=True)
280
  raise
281
 
282
- # ===== Parse JSON response =====
283
  try:
284
  cleaned = _clean_json(raw)
285
  data = json.loads(cleaned)
286
  except json.JSONDecodeError as e:
287
- logger.error(f"Invalid JSON response: {raw[:100]}")
288
  data = {"allowed": False, "ai_reply": "Sorry, I didn't understand that. Could you rephrase?"}
289
 
290
- # ===== Handle LISTING INTENT =====
291
  if data.get("intent") == "list":
292
  data["allowed"] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  missing = _get_missing_fields(data)
294
 
295
  if missing:
@@ -301,56 +356,63 @@ async def intent_node(state: Dict) -> Dict:
301
  data["missing_fields"] = []
302
  data["draft_preview"] = _build_draft_preview(data)
303
  data["ai_reply"] = "Perfect! Let me prepare your listing draft..."
304
- logger.info("All required fields complete")
305
 
306
- # ===== Handle SEARCH INTENT =====
307
  if data.get("intent") == "search":
308
  data["allowed"] = True
 
 
 
 
 
 
 
 
 
 
309
 
310
- # ===== Normalize values =====
311
  location = _normalize_locations(data.get("location"))
312
  amenities = _normalize_amenities(data.get("amenities", []))
313
  price_type = _normalize_price_type(data.get("price_type"))
314
  listing_type = _normalize_listing_type(data.get("listing_type"))
315
 
316
- # ===== ML INFERENCE & VALIDATION =====
317
  if data.get("intent") == "list":
318
  with trace_operation("ml_processing"):
319
- # Extract location
320
  if data.get("location"):
321
  try:
322
  city, loc_info = await ml_extractor.extract_location_from_address(data["location"])
323
  if city:
324
  data["location"] = city
325
  data["location_details"] = loc_info
326
- logger.info(f"Location extracted: {data['location']}")
327
  except Exception as e:
328
- logger.warning(f"⚠️ Location extraction failed: {e}")
329
 
330
- # Infer listing type
331
  try:
332
- lt, conf = ml_extractor.infer_listing_type(
333
- data, user_role=user_role, user_message=human_msg
334
  )
335
  if lt:
336
  data["listing_type"] = lt
337
  data["listing_confidence"] = conf
338
- logger.info(f"Listing type inferred: {lt}")
339
  except Exception as e:
340
- logger.warning(f"⚠️ Listing type inference failed: {e}")
341
 
342
- # Infer currency
343
  try:
344
  currency, city, conf = await ml_extractor.infer_currency(data)
345
  if currency:
346
  data["currency"] = currency
347
  data["currency_confidence"] = conf
348
- logger.info(f"Currency inferred: {currency}")
349
  except Exception as e:
350
- logger.warning(f"⚠️ Currency inference failed: {e}")
351
  data["currency"] = data.get("currency", "XOF")
352
 
353
- # ===== Update state =====
354
  state.update(
355
  allowed=data.get("allowed", False),
356
  status=data.get("status"),
@@ -372,9 +434,11 @@ async def intent_node(state: Dict) -> Dict:
372
  )
373
 
374
  logger.info(
375
- f"👤 Intent node processed",
376
  extra={
377
  "intent": data.get("intent"),
 
 
378
  "status": state.get("status"),
379
  }
380
  )
 
1
+ # app/ai/nodes/intent_node.py - WITH DYNAMIC ROLE SYSTEM
2
  import json
3
  import re
4
  from typing import Dict, List
 
15
  )
16
  from app.core.observability import get_token_tracker
17
  from app.ml.models.ml_listing_extractor import get_ml_extractor
18
+ from app.ai.services.dynamic_role_manager import get_dynamic_role_manager
19
  from app.ai.nodes.draft_node import (
20
  _generate_title,
21
  _generate_description,
 
28
  TEMP = 0
29
 
30
  ml_extractor = get_ml_extractor()
31
+ role_manager = get_dynamic_role_manager()
32
+
33
+ # Import Role-Based Inference Engine
34
+ try:
35
+ from app.ml.models.user_role_context_handler import (
36
+ RoleBasedInferenceEngine,
37
+ UserRoleDetector,
38
+ )
39
+ role_engine = RoleBasedInferenceEngine()
40
+ role_detector = UserRoleDetector()
41
+ logger.info("Role-based inference engine loaded")
42
+ except Exception as e:
43
+ logger.warning(f"Role-based inference not available: {e}")
44
+ role_engine = None
45
+ role_detector = None
46
+
47
 
48
  # ============================================================
49
  # Helpers
 
55
  with open("app/ai/prompts/system_prompt.txt", encoding="utf-8") as f:
56
  return f.read()
57
  except FileNotFoundError:
58
+ logger.error("System prompt file not found")
59
  return "You are Aida, a helpful AI assistant."
60
 
61
  SYSTEM_PROMPT = _load_system()
 
177
  "field_confidences": data.get("field_validations", {}),
178
  }
179
 
180
+ def infer_listing_type(state: Dict, user_role: str = None, user_message: str = None) -> tuple:
181
+ """Intelligently infer listing_type"""
182
+
183
+ explicit_type = state.get("listing_type")
184
+ price_type = state.get("price_type")
185
+
186
+ # 1. If explicitly stated, use it
187
+ if explicit_type and explicit_type in ["rent", "short-stay", "sale", "roommate"]:
188
+ logger.info(f"Listing type explicit: {explicit_type}")
189
+ return explicit_type, 0.99
190
+
191
+ # 2. Infer from price_type
192
+ if price_type:
193
+ price_type_lower = price_type.lower().strip()
194
+
195
+ # Short-stay indicators
196
+ if price_type_lower in ["nightly", "daily", "weekly", "night", "day", "week"]:
197
+ logger.info(f"Inferred short-stay from price_type: {price_type}")
198
+ return "short-stay", 0.95
199
+
200
+ # Monthly/Yearly = RENT
201
+ elif price_type_lower in ["monthly", "yearly", "month", "year", "mth", "yr"]:
202
+ logger.info(f"Inferred rent from price_type: {price_type}")
203
+ return "rent", 0.95
204
+
205
+ # 3. Default to rent
206
+ logger.warning("Could not infer listing_type, defaulting to rent")
207
+ return "rent", 0.5
208
+
209
+
210
  # ============================================================
211
+ # Intent Node with Dynamic Role
212
  # ============================================================
213
 
214
  @async_retry(strategy=RetryStrategy.MODERATE, operation_name="intent_node")
215
  async def intent_node(state: Dict) -> Dict:
216
+ """LangGraph node: Extract intent with DYNAMIC ROLE ASSIGNMENT"""
 
 
 
 
 
 
 
 
 
217
 
218
  current_msg = _get_current_message(state).lower()
219
  status = state.get("status")
220
+ base_user_role = state.get("user_role", "renter") # Original signup role
221
 
222
  with trace_operation(
223
  "intent_node",
224
  {
225
  "status": status,
226
+ "base_role": base_user_role,
227
  "has_draft": state.get("draft_preview") is not None,
228
  }
229
  ):
230
+ # Handle commands FIRST when preview is active
231
  if status in {"preview_shown", "waiting_for_images"} and state.get("draft_preview"):
232
+ logger.info(f"COMMAND DETECTION MODE: status={status}")
233
 
 
234
  if any(w in current_msg for w in {"publish", "go live", "confirm", "yes", "ok"}):
235
+ logger.info("COMMAND: publish")
236
  state["intent"] = "publish"
237
  state["ai_reply"] = ""
238
  return state
239
 
 
240
  if "edit" in current_msg or "change" in current_msg or "update" in current_msg:
241
  field = current_msg.replace("edit", "").replace("change", "").replace("update", "").strip()
242
+ logger.info(f"COMMAND: edit field='{field}'")
243
  state["status"] = "collecting"
244
  state["missing_fields"] = [field] if field else ["location"]
245
  state["ai_reply"] = f"Sure! What would you like to change for **{field}**?"
246
  return state
247
 
 
248
  if any(w in current_msg for w in {"discard", "cancel", "delete", "no"}):
249
+ logger.info("COMMAND: discard")
250
  state["status"] = None
251
  state["draft_preview"] = None
252
  state["intent"] = None
253
  state["ai_reply"] = "Draft discarded. Let me know if you want to list another property!"
254
  return state
255
 
256
+ logger.info("COMMAND: casual chat")
 
257
  state["ai_reply"] = "Say **publish** to list, **edit** to change, or **discard** to start over."
258
  return state
259
 
260
+ # Skip if status is published or error
 
261
  if state.get("status") in ["published", "error"]:
262
+ logger.info(f"Skipping intent_node, status={state.get('status')}")
263
  return state
264
 
 
265
  user_id = state.get("user_id")
266
  human_msg = state["messages"][-1]["content"]
267
 
268
+ # LLM CALL
269
  with trace_operation("llm_call_with_routing"):
270
  try:
 
271
  context_mgr = get_context_manager()
272
  messages = await context_mgr.manage_context([
273
+ {"role": "system", "content": SYSTEM_PROMPT.replace("{user_role}", base_user_role)},
274
  {"role": "user", "content": human_msg},
275
  ])
276
 
277
+ logger.info("Calling LLM with smart routing")
278
 
 
279
  text, model_used, usage = await call_llm_smart(
280
  messages,
281
  intent=state.get("intent"),
 
283
  max_tokens=MAX_TOKENS,
284
  )
285
 
 
286
  tracker = get_token_tracker()
287
  tracker.record_tokens(
288
  model_used,
 
291
  )
292
 
293
  logger.info(
294
+ f"LLM response from {model_used}",
295
  extra={
296
  "tokens": usage.get("total_tokens", 0),
297
  "duration_ms": usage.get("duration_ms", 0),
 
301
  raw = text
302
 
303
  except LLMError as e:
304
+ logger.error(f"LLM error: {e.message}")
305
  state["ai_reply"] = "Sorry, I'm having trouble. Please try again."
306
  state["status"] = "error"
307
  return state
308
 
309
  except Exception as e:
310
+ logger.error(f"Unexpected LLM error: {e}", exc_info=True)
311
  raise
312
 
313
+ # Parse JSON
314
  try:
315
  cleaned = _clean_json(raw)
316
  data = json.loads(cleaned)
317
  except json.JSONDecodeError as e:
318
+ logger.error(f"Invalid JSON response: {raw[:100]}")
319
  data = {"allowed": False, "ai_reply": "Sorry, I didn't understand that. Could you rephrase?"}
320
 
321
+ # Handle LISTING INTENT with DYNAMIC ROLE
322
  if data.get("intent") == "list":
323
  data["allowed"] = True
324
+ listing_type = _normalize_listing_type(data.get("listing_type"))
325
+
326
+ # NEW: Get dynamic role based on listing type
327
+ dynamic_role, role_desc, role_conf = role_manager.get_dynamic_role_for_listing(
328
+ base_user_role,
329
+ listing_type
330
+ )
331
+
332
+ if dynamic_role is None:
333
+ # User not allowed to create this type of listing
334
+ logger.warning(f"User {base_user_role} cannot create {listing_type} listing")
335
+ data["allowed"] = False
336
+ data["ai_reply"] = role_desc
337
+ state.update(
338
+ allowed=False,
339
+ ai_reply=data["ai_reply"],
340
+ status="error"
341
+ )
342
+ return state
343
+
344
+ # Store dynamic role in state
345
+ state["dynamic_role"] = dynamic_role
346
+ logger.info(f"Dynamic role assigned: {dynamic_role} ({role_desc})")
347
+
348
  missing = _get_missing_fields(data)
349
 
350
  if missing:
 
356
  data["missing_fields"] = []
357
  data["draft_preview"] = _build_draft_preview(data)
358
  data["ai_reply"] = "Perfect! Let me prepare your listing draft..."
359
+ logger.info("All required fields complete")
360
 
361
+ # Handle SEARCH INTENT with DYNAMIC ROLE
362
  if data.get("intent") == "search":
363
  data["allowed"] = True
364
+
365
+ # NEW: Get dynamic role for search
366
+ dynamic_role, role_desc, role_conf = role_manager.get_dynamic_role_for_search(
367
+ base_user_role,
368
+ data.get("listing_type")
369
+ )
370
+
371
+ # Store dynamic role
372
+ state["dynamic_role"] = dynamic_role
373
+ logger.info(f"Dynamic role assigned: {dynamic_role} ({role_desc})")
374
 
375
+ # Normalize values
376
  location = _normalize_locations(data.get("location"))
377
  amenities = _normalize_amenities(data.get("amenities", []))
378
  price_type = _normalize_price_type(data.get("price_type"))
379
  listing_type = _normalize_listing_type(data.get("listing_type"))
380
 
381
+ # ML INFERENCE
382
  if data.get("intent") == "list":
383
  with trace_operation("ml_processing"):
 
384
  if data.get("location"):
385
  try:
386
  city, loc_info = await ml_extractor.extract_location_from_address(data["location"])
387
  if city:
388
  data["location"] = city
389
  data["location_details"] = loc_info
390
+ logger.info(f"Location extracted: {data['location']}")
391
  except Exception as e:
392
+ logger.warning(f"Location extraction failed: {e}")
393
 
 
394
  try:
395
+ lt, conf = infer_listing_type(
396
+ data, user_role=state.get("dynamic_role"), user_message=human_msg
397
  )
398
  if lt:
399
  data["listing_type"] = lt
400
  data["listing_confidence"] = conf
401
+ logger.info(f"Listing type inferred: {lt}")
402
  except Exception as e:
403
+ logger.warning(f"Listing type inference failed: {e}")
404
 
 
405
  try:
406
  currency, city, conf = await ml_extractor.infer_currency(data)
407
  if currency:
408
  data["currency"] = currency
409
  data["currency_confidence"] = conf
410
+ logger.info(f"Currency inferred: {currency}")
411
  except Exception as e:
412
+ logger.warning(f"Currency inference failed: {e}")
413
  data["currency"] = data.get("currency", "XOF")
414
 
415
+ # Update state
416
  state.update(
417
  allowed=data.get("allowed", False),
418
  status=data.get("status"),
 
434
  )
435
 
436
  logger.info(
437
+ f"Intent node processed",
438
  extra={
439
  "intent": data.get("intent"),
440
+ "base_role": base_user_role,
441
+ "dynamic_role": state.get("dynamic_role"),
442
  "status": state.get("status"),
443
  }
444
  )
app/ai/prompts/system_prompt.txt CHANGED
@@ -1,76 +1,89 @@
1
  You are Aida, Lojiz's friendly real-estate AI assistant.
2
- User role: {user_role} (landlord | renter)
3
 
4
  ------------------------------------------------------
5
- BRAND IDENTITY & RESPONSES
6
  ------------------------------------------------------
7
 
8
- **WHO YOU ARE:**
9
  - You are AIDA, Lojiz's proprietary real-estate AI assistant
10
  - Do NOT mention DeepSeek, GPT, or any other LLM names
11
  - You are a standalone AI developed specifically for Lojiz
12
  - Never refer to yourself as based on other AI models
13
 
14
- **IF ASKED "WHO ARE YOU?"**
15
- "I'm Aida, Lojiz's AI real-estate assistant. I help landlords list properties and renters find their perfect home!"
16
 
17
- **IF ASKED "WHO DESIGNED YOU?" OR "WHO CREATED YOU?"**
18
  "I was designed and developed by the Lojiz team. Our talented team includes:
19
- - **Emeka Destiny Ebuka** - Lead AI & ML Engineer/Developer - designed my intelligence and learning capabilities
20
- - **Uche Samuel Chukwuemeka** - Product Designer - shaped my user experience and personality
21
-
22
  Together, they created me to make real-estate transactions easier and more accessible for everyone."
23
 
24
- **IF ASKED "WHO BUILT YOU?" OR "WHAT COMPANY MADE YOU?"**
25
- "I'm built by Lojiz, a real-estate platform designed to connect landlords and renters seamlessly across Africa. The Lojiz team created me with passion and expertise in AI and real-estate technology."
26
-
27
- **IF ASKED "ARE YOU AN LLM?" OR "WHAT MODEL ARE YOU?"**
28
- "I'm Aida, Lojiz's specialized AI assistant. I'm not just a general-purpose AI - I'm specifically built to understand real-estate, help with property listings, and guide users through the Lojiz platform. That's what makes me different!"
29
 
30
- **NEVER SAY:**
31
- - "I'm based on DeepSeek"
32
- - "I'm powered by GPT"
33
- - "I'm an LLM (Language Model)"
34
- - ❌ "I was trained on general data"
35
- - ✅ Say "I'm Aida, Lojiz's AI assistant" instead
36
 
37
  ------------------------------------------------------
38
- PERSONALITY & GRAMMAR TOLERANCE
39
  ------------------------------------------------------
40
- - Speak like a human, short and warm.
41
- - IGNORE typos, grammar mistakes, and spelling errors. Understand intent anyway.
42
- - If the user greets or talks off-topic, reply casually and ask: "Would you like to list a property or search for one?"
43
- - Only switch to JSON output when property details are mentioned OR when user clearly wants to SEARCH / SEE / FIND houses/flats/rooms/apartments.
44
- - Be proud of being Aida - make it clear you're a specialized real-estate AI
45
 
46
  ------------------------------------------------------
47
- CRITICAL: PRESERVE USER CORRECTIONS
48
  ------------------------------------------------------
49
- - ⚠️ IMPORTANT: If a user corrects ANY field (location, bedrooms, price, etc.), PRESERVE that correction
50
- - Do NOT regenerate or revert corrections when showing previews
51
- - Example: If user says "actually it's in Calavi not Cotonou", update state AND keep it that way
52
- - When showing draft preview, always use the LATEST corrected values from state
53
- - If a user requests changes (amenity icons, requirements, etc.), apply them WITHOUT reverting previous corrections
54
 
55
- ------------------------------------------------------
56
- AMENITY ICONS
57
- ------------------------------------------------------
58
- When user requests "icons" or "emojis" for amenities, respond with this format:
 
 
 
 
 
 
 
 
 
59
 
60
- 📶 Wifi | 🅿️ Parking | 🛋️ Furnished | 🧼 Washing Machine | 🔥 Dryer | 🏞️ Balcony | 🏊 Pool | 💪 Gym | 🌿 Garden | ❄️ Air Conditioning | 🍳 Kitchen
 
 
 
 
61
 
62
- Backend will automatically add icons to all amenities listed:
63
- - wifi 📶
64
- - parking 🅿️
65
- - furnished 🛋️
66
- - washing machine / washing → 🧼
67
- - dryer → 🔥
68
- - balcony → 🏞️
69
- - pool 🏊
70
- - gym 💪
71
- - garden 🌿
72
- - air conditioning / ac → ❄️
73
- - kitchen → 🍳
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  ------------------------------------------------------
76
  LISTING CREATION - PROGRESSIVE COLLECTION
@@ -91,27 +104,26 @@ OPTIONAL FIELDS (Ask, but not required):
91
  AUTO-GENERATED:
92
  - title (AI generates from location, bedrooms, listing_type)
93
  - description (AI generates professional description)
94
- - currency (auto-detect from location: LagosNGN, CotonouXOF, etc.)
95
- - amenities_with_icons (backend adds icons to all amenities)
96
 
97
  LOCATION EXTRACTION:
98
  - Extract ONLY the city/area name
99
  - Ignore long descriptions
100
  - Examples:
101
- "calavi quartier zogbadje" location: "calavi"
102
- "VI in Lagos" location: "lagos"
103
- "Lekki, Lagos" location: "lagos"
104
 
105
  LISTING TYPE AUTO-DETECTION:
106
- - "for rent" / "monthly" / "yearly" rent
107
- - "short stay" / "nightly" / "daily" / "weekly" short-stay
108
- - "for sale" / "selling" sale
109
- - "roommate" / "sharing" / "flatmate" roommate
110
 
111
  PRICE TYPE AUTO-DETECTION:
112
- - "monthly" / "month" / "per month" / "mth" monthly
113
- - "nightly" / "night" / "per night" / "daily" / "day" nightly
114
- - "yearly" / "year" / "per year" / "annum" yearly
115
 
116
  PROGRESSIVE COLLECTION FLOW:
117
  1. User provides initial info (may be incomplete)
@@ -120,10 +132,10 @@ PROGRESSIVE COLLECTION FLOW:
120
  4. Ask for missing fields ONE AT A TIME
121
  5. User provides each field (or corrects previous ones)
122
  6. Append/update to existing fields
123
- 7. When ALL required fields complete Ask for optional fields
124
- 8. When amenities/requirements collected Generate DRAFT
125
  9. Show DRAFT preview to user
126
- 10. User can request changes (icons, updates, etc.) - PRESERVE all corrections
127
  11. User reviews and says "publish" to confirm
128
 
129
  ------------------------------------------------------
@@ -135,7 +147,7 @@ When user starts listing a property:
135
  2. Check for missing REQUIRED fields
136
  3. Ask missing fields one by one
137
  4. Build up state progressively
138
- 5. ⚠️ PRESERVE all corrections and changes
139
 
140
  Response format while collecting:
141
  {
@@ -150,7 +162,7 @@ Response format while collecting:
150
  "requirements": null,
151
  "status": "collecting",
152
  "missing_fields": ["amenities", "requirements"],
153
- "next_question": "Any amenities? (e.g., wifi, parking, balcony, pool, furnished, kitchen, dryer, garden, etc.)",
154
  "ai_reply": "Great! I have: 2-bed in Lagos, 50k/month. Any amenities?"
155
  }
156
 
@@ -171,71 +183,8 @@ When ALL required fields complete:
171
  "draft_preview": null
172
  }
173
 
174
- When amenities/requirements provided:
175
- {
176
- "intent": "list",
177
- "status": "draft_ready",
178
- "ai_reply": "Perfect! Let me prepare your listing draft...",
179
- "draft_preview": {
180
- ...listing data...
181
- "amenities_with_icons": "📶 Wifi | 🅿️ Parking | 🧼 Washing Machine | 🔥 Dryer"
182
- }
183
- }
184
-
185
- ------------------------------------------------------
186
- EXAMPLES - LISTING CREATION
187
- ------------------------------------------------------
188
-
189
- User: "I want to list my 2-bed apartment in Lagos for rent, 50k monthly"
190
- {
191
- "intent": "list",
192
- "location": "lagos",
193
- "bedrooms": 2,
194
- "bathrooms": null,
195
- "price": 50000,
196
- "listing_type": "rent",
197
- "price_type": "monthly",
198
- "amenities": [],
199
- "requirements": null,
200
- "status": "collecting",
201
- "missing_fields": ["bathrooms"],
202
- "next_question": "How many bathrooms?",
203
- "ai_reply": "Got it! 2-bed in Lagos, 50k/month. How many bathrooms?"
204
- }
205
-
206
- User: "1 bathroom, with wifi and parking"
207
- {
208
- "intent": "list",
209
- "location": "lagos",
210
- "bedrooms": 2,
211
- "bathrooms": 1,
212
- "price": 50000,
213
- "listing_type": "rent",
214
- "price_type": "monthly",
215
- "amenities": ["wifi", "parking"],
216
- "requirements": null,
217
- "status": "checking_optional",
218
- "missing_fields": [],
219
- "ai_reply": "Perfect! Any special requirements for renters?",
220
- }
221
-
222
- User: "actually it's in Calavi not Lagos"
223
- {
224
- "intent": "list",
225
- "location": "calavi",
226
- "bedrooms": 2,
227
- "bathrooms": 1,
228
- "price": 50000,
229
- "listing_type": "rent",
230
- "price_type": "monthly",
231
- "amenities": ["wifi", "parking"],
232
- "requirements": null,
233
- "status": "checking_optional",
234
- "ai_reply": "Got it! Updated to Calavi. Any special requirements for renters?",
235
- }
236
-
237
  ------------------------------------------------------
238
- SEARCH INTENT (SAME AS BEFORE)
239
  ------------------------------------------------------
240
  When user searches, extract and COMBINE ALL mentioned filters:
241
  - location: city/area (handle typos)
@@ -270,11 +219,13 @@ CRITICAL RULES
270
  4. For listing creation: collect required fields progressively
271
  5. For search: extract all filters at once
272
  6. Return ONLY JSON (no markdown, no preamble)
273
- 7. When all required fields complete ask for optional fields ONLY ONCE
274
- 8. When optional fields provided show draft preview
275
  9. Wait for "publish" command to save listing
276
- 10. ⚠️ PRESERVE user corrections - never revert them
277
- 11. When user requests amenity icons, add them to draft_preview
278
- 12. BRAND RULE: Always refer to yourself as AIDA, never mention other LLMs
279
- 13. BRAND RULE: Give credit to Lojiz team when asked about your creators
280
- 14. BRAND RULE: Stand your ground as a specialized real-estate AI, not a general-purpose model
 
 
 
1
  You are Aida, Lojiz's friendly real-estate AI assistant.
2
+ User role: {user_role} (landlord | renter | airbnb_host | roommate_seeker | seller | buyer)
3
 
4
  ------------------------------------------------------
5
+ BRAND IDENTITY
6
  ------------------------------------------------------
7
 
8
+ WHO YOU ARE:
9
  - You are AIDA, Lojiz's proprietary real-estate AI assistant
10
  - Do NOT mention DeepSeek, GPT, or any other LLM names
11
  - You are a standalone AI developed specifically for Lojiz
12
  - Never refer to yourself as based on other AI models
13
 
14
+ IF ASKED "WHO ARE YOU?":
15
+ "I'm Aida, Lojiz's AI real-estate assistant. I help landlords list properties, renters find homes, and more!"
16
 
17
+ IF ASKED "WHO DESIGNED YOU?":
18
  "I was designed and developed by the Lojiz team. Our talented team includes:
19
+ - Emeka Destiny Ebuka - Lead AI & ML Engineer/Developer
20
+ - Uche Samuel Chukwuemeka - Product Designer
 
21
  Together, they created me to make real-estate transactions easier and more accessible for everyone."
22
 
23
+ IF ASKED "ARE YOU AN LLM?":
24
+ "I'm Aida, Lojiz's specialized AI assistant. I'm not just a general-purpose AI - I'm specifically built to understand real-estate, help with property listings, and guide users through the Lojiz platform."
 
 
 
25
 
26
+ NEVER SAY:
27
+ - "I'm based on DeepSeek"
28
+ - "I'm powered by GPT"
29
+ - "I'm an LLM (Language Model)"
 
 
30
 
31
  ------------------------------------------------------
32
+ PERSONALITY
33
  ------------------------------------------------------
34
+ - Speak like a human, short and warm
35
+ - IGNORE typos and grammar mistakes. Understand intent anyway
36
+ - If user talks off-topic, reply casually and ask: "Would you like to list a property or search for one?"
37
+ - Only switch to JSON when property details are mentioned
38
+ - Be proud of being Aida - make it clear you're specialized in real-estate
39
 
40
  ------------------------------------------------------
41
+ ROLE-BASED BEHAVIOR
42
  ------------------------------------------------------
 
 
 
 
 
43
 
44
+ Detect and adapt to different user roles:
45
+
46
+ LANDLORD:
47
+ - Help list properties for rent or sale
48
+ - Focus on: location, price, amenities, tenant requirements
49
+ - Price types: monthly, yearly
50
+ - Listing types: rent, short-stay, sale
51
+
52
+ RENTER/TENANT:
53
+ - Help search for rental properties
54
+ - Focus on: budget, location, amenities needed
55
+ - Can also list rooms for roommate matching
56
+ - Listing types: roommate
57
 
58
+ AIRBNB HOST:
59
+ - Help list short-stay vacation properties
60
+ - Focus on: guest amenities, location convenience
61
+ - Price types: nightly, daily, weekly
62
+ - Listing type: short-stay
63
 
64
+ ROOMMATE SEEKER:
65
+ - Help find rooms to share or advertise spare rooms
66
+ - Focus on: compatibility, house rules, utilities included
67
+ - Price types: monthly, yearly
68
+ - Listing type: roommate
69
+
70
+ SELLER (Property Sale):
71
+ - Help list properties for sale
72
+ - Focus on: property condition, location, price
73
+ - Listing type: sale
74
+
75
+ BUYER:
76
+ - Help search for properties to purchase
77
+ - Focus on: budget range, location, property type
78
+
79
+ ------------------------------------------------------
80
+ PRESERVE USER CORRECTIONS
81
+ ------------------------------------------------------
82
+ IMPORTANT: If a user corrects ANY field (location, bedrooms, price, etc.), PRESERVE that correction
83
+ - Do NOT regenerate or revert corrections when showing previews
84
+ - Example: If user says "actually it's in Calavi not Cotonou", update state AND keep it that way
85
+ - When showing draft preview, always use the LATEST corrected values
86
+ - If a user requests changes, apply them WITHOUT reverting previous corrections
87
 
88
  ------------------------------------------------------
89
  LISTING CREATION - PROGRESSIVE COLLECTION
 
104
  AUTO-GENERATED:
105
  - title (AI generates from location, bedrooms, listing_type)
106
  - description (AI generates professional description)
107
+ - currency (auto-detect from location: Lagos=NGN, Cotonou=XOF, etc.)
 
108
 
109
  LOCATION EXTRACTION:
110
  - Extract ONLY the city/area name
111
  - Ignore long descriptions
112
  - Examples:
113
+ "calavi quartier zogbadje" -> location: "calavi"
114
+ "VI in Lagos" -> location: "lagos"
115
+ "Lekki, Lagos" -> location: "lagos"
116
 
117
  LISTING TYPE AUTO-DETECTION:
118
+ - "for rent" / "monthly" / "yearly" -> rent
119
+ - "short stay" / "nightly" / "daily" / "weekly" -> short-stay
120
+ - "for sale" / "selling" -> sale
121
+ - "roommate" / "sharing" / "flatmate" -> roommate
122
 
123
  PRICE TYPE AUTO-DETECTION:
124
+ - "monthly" / "month" / "per month" / "mth" -> monthly
125
+ - "nightly" / "night" / "per night" / "daily" / "day" -> nightly
126
+ - "yearly" / "year" / "per year" / "annum" -> yearly
127
 
128
  PROGRESSIVE COLLECTION FLOW:
129
  1. User provides initial info (may be incomplete)
 
132
  4. Ask for missing fields ONE AT A TIME
133
  5. User provides each field (or corrects previous ones)
134
  6. Append/update to existing fields
135
+ 7. When ALL required fields complete -> Ask for optional fields
136
+ 8. When amenities/requirements collected -> Generate DRAFT
137
  9. Show DRAFT preview to user
138
+ 10. User can request changes - PRESERVE all corrections
139
  11. User reviews and says "publish" to confirm
140
 
141
  ------------------------------------------------------
 
147
  2. Check for missing REQUIRED fields
148
  3. Ask missing fields one by one
149
  4. Build up state progressively
150
+ 5. PRESERVE all corrections and changes
151
 
152
  Response format while collecting:
153
  {
 
162
  "requirements": null,
163
  "status": "collecting",
164
  "missing_fields": ["amenities", "requirements"],
165
+ "next_question": "Any amenities? (e.g., wifi, parking, balcony, pool, furnished, kitchen)",
166
  "ai_reply": "Great! I have: 2-bed in Lagos, 50k/month. Any amenities?"
167
  }
168
 
 
183
  "draft_preview": null
184
  }
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  ------------------------------------------------------
187
+ SEARCH INTENT
188
  ------------------------------------------------------
189
  When user searches, extract and COMBINE ALL mentioned filters:
190
  - location: city/area (handle typos)
 
219
  4. For listing creation: collect required fields progressively
220
  5. For search: extract all filters at once
221
  6. Return ONLY JSON (no markdown, no preamble)
222
+ 7. When all required fields complete -> ask for optional fields ONLY ONCE
223
+ 8. When optional fields provided -> show draft preview
224
  9. Wait for "publish" command to save listing
225
+ 10. PRESERVE user corrections - never revert them
226
+ 11. BRAND RULE: Always refer to yourself as AIDA, never mention other LLMs
227
+ 12. BRAND RULE: Give credit to Lojiz team when asked about your creators
228
+ 13. BRAND RULE: Stand your ground as a specialized real-estate AI
229
+ 14. ROLE RULE: Detect and adapt to user role
230
+ 15. ROLE RULE: Adjust required fields based on user role
231
+ 16. ROLE RULE: Use role-specific language and focus areas
app/ai/routes/chat.py CHANGED
@@ -1,4 +1,4 @@
1
- # app/ai/routes/chat.py - Enhanced with Observability & Rate Limiting
2
  from fastapi import APIRouter, Depends, HTTPException, Request
3
  from fastapi.security import HTTPBearer
4
  from pydantic import BaseModel
@@ -34,6 +34,25 @@ class AskBody(BaseModel):
34
  user_role: Optional[str] = None
35
  history: Optional[List[MessageHistory]] = None
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # ============================================================
38
  # Enhanced Chat Endpoint
39
  # ============================================================
@@ -45,14 +64,7 @@ async def ask_ai(
45
  request: Request,
46
  token: str = Depends(security),
47
  ):
48
- """
49
- Enhanced chat endpoint with:
50
- - Rate limiting (token bucket)
51
- - Distributed tracing
52
- - Token tracking
53
- - Error handling with observability
54
- - Context management
55
- """
56
 
57
  start_time = time.time()
58
  request_id = request.headers.get("x-request-id", "unknown")
@@ -67,7 +79,7 @@ async def ask_ai(
67
  }
68
  ) as root_span:
69
  try:
70
- # ===== Step 1: Validate Token =====
71
  with trace_operation("token_validation"):
72
  payload = decode_access_token(token.credentials)
73
  if not payload:
@@ -76,7 +88,7 @@ async def ask_ai(
76
  user_id = payload["user_id"]
77
  user_role = payload.get("role", "renter")
78
 
79
- # ===== Step 2: Rate Limiting =====
80
  with trace_operation(
81
  "rate_limit_check",
82
  {"user_id": user_id, "operation": "chat"}
@@ -91,20 +103,46 @@ async def ask_ai(
91
 
92
  if not is_allowed:
93
  logger.warning(
94
- f"🚫 Rate limit exceeded for user: {user_id}",
95
  extra={"rate_info": rate_info}
96
  )
97
  raise RateLimitExceeded(retry_after=60)
98
 
99
- # Add rate limit headers
100
  root_span.set_attribute("rate_limit.remaining", rate_info["user"]["remaining"])
101
  root_span.set_attribute("rate_limit.capacity", rate_info["user"]["capacity"])
102
 
103
- # ===== Step 3: Context Management =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  with trace_operation("context_management", {"user_id": user_id}):
105
  window = get_message_window(user_id)
106
 
107
- # Build conversation context from history (if provided)
108
  conversation_context = ""
109
  if body.history:
110
  for msg in body.history:
@@ -122,7 +160,7 @@ async def ask_ai(
122
  # Add to message window
123
  window.add_message("user", full_message)
124
 
125
- # ===== Step 4: AI Chat Processing =====
126
  with trace_operation(
127
  "aida_chat_sync",
128
  {
@@ -137,9 +175,8 @@ async def ask_ai(
137
  full_message,
138
  )
139
 
140
- # ===== Step 5: Token Tracking =====
141
  with trace_operation("token_tracking"):
142
- # Track tokens if available
143
  usage = final_state.get("token_usage", {})
144
  if usage:
145
  tracker = get_token_tracker()
@@ -151,7 +188,7 @@ async def ask_ai(
151
  usage.get("cost", 0.0),
152
  )
153
 
154
- # ===== Step 6: Build Response =====
155
  response = {
156
  "success": True,
157
  "text": final_state.get("ai_reply", ""),
@@ -162,11 +199,12 @@ async def ask_ai(
162
  "request_id": request_id,
163
  "processing_time_ms": int((time.time() - start_time) * 1000),
164
  "user_id": user_id,
 
165
  "status": final_state.get("status"),
166
  },
167
  }
168
 
169
- # ===== Step 7: Add Message to Window =====
170
  with trace_operation("window_update"):
171
  window.add_message("assistant", final_state.get("ai_reply", ""))
172
 
@@ -179,9 +217,10 @@ async def ask_ai(
179
  })
180
 
181
  logger.info(
182
- f"Chat processed successfully",
183
  extra={
184
  "user_id": user_id,
 
185
  "request_id": request_id,
186
  "processing_time_ms": response["metadata"]["processing_time_ms"],
187
  "has_cards": len(response["cards"]) > 0,
@@ -192,7 +231,7 @@ async def ask_ai(
192
 
193
  except RateLimitExceeded as e:
194
  root_span.set_attribute("error.type", "rate_limit_exceeded")
195
- logger.error(f"🚫 Rate limit: {str(e)}")
196
  raise HTTPException(
197
  status_code=429,
198
  detail=e.message,
@@ -206,7 +245,7 @@ async def ask_ai(
206
  root_span.record_exception(e)
207
  root_span.set_attribute("error.type", type(e).__name__)
208
  logger.error(
209
- f"Chat endpoint error: {str(e)}",
210
  exc_info=True,
211
  extra={"user_id": user_id if 'user_id' in locals() else "unknown"}
212
  )
@@ -248,7 +287,7 @@ async def get_rate_limit_status(
248
  }
249
 
250
  # ============================================================
251
- # Health Check with Rate Limiter
252
  # ============================================================
253
 
254
  @router.get("/health")
@@ -264,5 +303,6 @@ async def chat_health() -> dict:
264
  "token_tracking": True,
265
  "context_management": True,
266
  "error_resilience": True,
 
267
  },
268
  }
 
1
+ # app/ai/routes/chat.py - Enhanced with Role Detection
2
  from fastapi import APIRouter, Depends, HTTPException, Request
3
  from fastapi.security import HTTPBearer
4
  from pydantic import BaseModel
 
34
  user_role: Optional[str] = None
35
  history: Optional[List[MessageHistory]] = None
36
 
37
+ # ============================================================
38
+ # User Role Detection Helper
39
+ # ============================================================
40
+
41
+ async def detect_user_role_and_model(user_message: str):
42
+ """Detect user role and rental model from message"""
43
+ try:
44
+ from app.ml.models.user_role_context_handler import UserRoleDetector
45
+
46
+ role_detector = UserRoleDetector()
47
+ detected_role, role_confidence = role_detector.detect_user_role(user_message.lower())
48
+ detected_model = role_detector.detect_rental_model(user_message.lower(), None)
49
+
50
+ return detected_role, role_confidence, detected_model
51
+ except Exception as e:
52
+ logger.warning(f"Role detection failed: {e}")
53
+ return None, 0.0, None
54
+
55
+
56
  # ============================================================
57
  # Enhanced Chat Endpoint
58
  # ============================================================
 
64
  request: Request,
65
  token: str = Depends(security),
66
  ):
67
+ """Enhanced chat endpoint with rate limiting and role detection"""
 
 
 
 
 
 
 
68
 
69
  start_time = time.time()
70
  request_id = request.headers.get("x-request-id", "unknown")
 
79
  }
80
  ) as root_span:
81
  try:
82
+ # Step 1: Validate Token
83
  with trace_operation("token_validation"):
84
  payload = decode_access_token(token.credentials)
85
  if not payload:
 
88
  user_id = payload["user_id"]
89
  user_role = payload.get("role", "renter")
90
 
91
+ # Step 2: Rate Limiting
92
  with trace_operation(
93
  "rate_limit_check",
94
  {"user_id": user_id, "operation": "chat"}
 
103
 
104
  if not is_allowed:
105
  logger.warning(
106
+ f"Rate limit exceeded for user: {user_id}",
107
  extra={"rate_info": rate_info}
108
  )
109
  raise RateLimitExceeded(retry_after=60)
110
 
 
111
  root_span.set_attribute("rate_limit.remaining", rate_info["user"]["remaining"])
112
  root_span.set_attribute("rate_limit.capacity", rate_info["user"]["capacity"])
113
 
114
+ # Step 2b: User Role Detection (NEW)
115
+ with trace_operation("user_role_detection"):
116
+ try:
117
+ detected_role, role_confidence, detected_model = await detect_user_role_and_model(
118
+ body.message
119
+ )
120
+
121
+ if detected_role and role_confidence > 0.7:
122
+ logger.info(
123
+ f"User role detected",
124
+ extra={
125
+ "detected_role": detected_role,
126
+ "confidence": role_confidence,
127
+ "rental_model": detected_model.value if detected_model else None,
128
+ "original_role": user_role,
129
+ }
130
+ )
131
+ user_role = detected_role
132
+ root_span.set_attribute("detected_role", detected_role)
133
+ root_span.set_attribute("rental_model", detected_model.value if detected_model else None)
134
+ else:
135
+ logger.debug(f"Role detection inconclusive (confidence: {role_confidence:.0%})")
136
+ root_span.set_attribute("role_confidence", role_confidence)
137
+
138
+ except Exception as e:
139
+ logger.warning(f"Role detection failed: {e}")
140
+
141
+ # Step 3: Context Management
142
  with trace_operation("context_management", {"user_id": user_id}):
143
  window = get_message_window(user_id)
144
 
145
+ # Build conversation context from history
146
  conversation_context = ""
147
  if body.history:
148
  for msg in body.history:
 
160
  # Add to message window
161
  window.add_message("user", full_message)
162
 
163
+ # Step 4: AI Chat Processing
164
  with trace_operation(
165
  "aida_chat_sync",
166
  {
 
175
  full_message,
176
  )
177
 
178
+ # Step 5: Token Tracking
179
  with trace_operation("token_tracking"):
 
180
  usage = final_state.get("token_usage", {})
181
  if usage:
182
  tracker = get_token_tracker()
 
188
  usage.get("cost", 0.0),
189
  )
190
 
191
+ # Step 6: Build Response
192
  response = {
193
  "success": True,
194
  "text": final_state.get("ai_reply", ""),
 
199
  "request_id": request_id,
200
  "processing_time_ms": int((time.time() - start_time) * 1000),
201
  "user_id": user_id,
202
+ "user_role": user_role,
203
  "status": final_state.get("status"),
204
  },
205
  }
206
 
207
+ # Step 7: Add Message to Window
208
  with trace_operation("window_update"):
209
  window.add_message("assistant", final_state.get("ai_reply", ""))
210
 
 
217
  })
218
 
219
  logger.info(
220
+ f"Chat processed successfully",
221
  extra={
222
  "user_id": user_id,
223
+ "user_role": user_role,
224
  "request_id": request_id,
225
  "processing_time_ms": response["metadata"]["processing_time_ms"],
226
  "has_cards": len(response["cards"]) > 0,
 
231
 
232
  except RateLimitExceeded as e:
233
  root_span.set_attribute("error.type", "rate_limit_exceeded")
234
+ logger.error(f"Rate limit: {str(e)}")
235
  raise HTTPException(
236
  status_code=429,
237
  detail=e.message,
 
245
  root_span.record_exception(e)
246
  root_span.set_attribute("error.type", type(e).__name__)
247
  logger.error(
248
+ f"Chat endpoint error: {str(e)}",
249
  exc_info=True,
250
  extra={"user_id": user_id if 'user_id' in locals() else "unknown"}
251
  )
 
287
  }
288
 
289
  # ============================================================
290
+ # Health Check
291
  # ============================================================
292
 
293
  @router.get("/health")
 
303
  "token_tracking": True,
304
  "context_management": True,
305
  "error_resilience": True,
306
+ "user_role_detection": True,
307
  },
308
  }
app/ai/services/dynamic_role_manager.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/ai/services/dynamic_role_manager.py - NEW
2
+ """
3
+ Dynamic Role Management System
4
+ Maps user's base role (landlord/renter) to specific roles based on listing/search type
5
+ """
6
+
7
+ import logging
8
+ from typing import Tuple
9
+ from enum import Enum
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class BaseRole(Enum):
15
+ """User's base role at signup"""
16
+ LANDLORD = "landlord"
17
+ RENTER = "renter"
18
+
19
+
20
+ class DynamicRole(Enum):
21
+ """Dynamic roles based on action/listing type"""
22
+ # Landlord-derived roles
23
+ LANDLORD = "landlord" # Creating rent listings
24
+ HOST = "host" # Creating short-stay listings
25
+ SELLER = "seller" # Creating sale listings
26
+
27
+ # Renter-derived roles
28
+ RENTER = "renter" # Searching for rentals
29
+ GUEST = "guest" # Searching for short-stay
30
+ BUYER = "buyer" # Searching for properties to buy
31
+ ROOMMATE_SEEKER = "roommate_seeker" # Searching for rooms
32
+ ROOMMATE_LISTER = "roommate_lister" # Listing rooms (only renters)
33
+
34
+
35
+ class DynamicRoleManager:
36
+ """Manage dynamic role assignment based on user action"""
37
+
38
+ def __init__(self):
39
+ logger.info("Dynamic Role Manager initialized")
40
+
41
+ def get_dynamic_role_for_listing(
42
+ self,
43
+ base_role: str,
44
+ listing_type: str
45
+ ) -> Tuple[str, str, float]:
46
+ """
47
+ Get dynamic role when user creates a LISTING
48
+
49
+ Args:
50
+ base_role: User's base role at signup (landlord or renter)
51
+ listing_type: Type of listing (rent, short-stay, sale, roommate)
52
+
53
+ Returns:
54
+ (dynamic_role, description, confidence)
55
+ """
56
+
57
+ base_role_lower = base_role.lower().strip()
58
+ listing_type_lower = listing_type.lower().strip() if listing_type else None
59
+
60
+ logger.info(f"Getting listing role: base={base_role_lower}, type={listing_type_lower}")
61
+
62
+ # LANDLORD creating listings
63
+ if base_role_lower == "landlord":
64
+
65
+ # Landlord + short-stay = HOST
66
+ if listing_type_lower == "short-stay":
67
+ logger.info("Landlord creating short-stay -> HOST role")
68
+ return DynamicRole.HOST.value, "Short-stay property host", 0.99
69
+
70
+ # Landlord + sale = SELLER
71
+ elif listing_type_lower == "sale":
72
+ logger.info("Landlord creating sale -> SELLER role")
73
+ return DynamicRole.SELLER.value, "Property seller", 0.99
74
+
75
+ # Landlord + rent = LANDLORD (standard)
76
+ elif listing_type_lower == "rent":
77
+ logger.info("Landlord creating rent -> LANDLORD role")
78
+ return DynamicRole.LANDLORD.value, "Rental property landlord", 0.99
79
+
80
+ # Landlord cannot create roommate listings
81
+ elif listing_type_lower == "roommate":
82
+ logger.warning("Landlord cannot create roommate listings")
83
+ return None, "Landlords cannot list for roommate matching", 0.0
84
+
85
+ # RENTER creating listings
86
+ elif base_role_lower == "renter":
87
+
88
+ # Only renters can create roommate listings
89
+ if listing_type_lower == "roommate":
90
+ logger.info("Renter creating roommate -> ROOMMATE_LISTER role")
91
+ return DynamicRole.ROOMMATE_LISTER.value, "Room share lister", 0.99
92
+
93
+ # Renters cannot create rent/short-stay/sale listings
94
+ else:
95
+ logger.warning(f"Renter cannot create {listing_type_lower} listings")
96
+ return None, "Renters can only list rooms for roommate matching", 0.0
97
+
98
+ logger.warning(f"Unknown base role: {base_role_lower}")
99
+ return None, "Unknown role", 0.0
100
+
101
+ def get_dynamic_role_for_search(
102
+ self,
103
+ base_role: str,
104
+ search_for: str = None
105
+ ) -> Tuple[str, str, float]:
106
+ """
107
+ Get dynamic role when user SEARCHES for properties
108
+
109
+ Args:
110
+ base_role: User's base role at signup (landlord or renter)
111
+ search_for: What they're searching for (rent, short-stay, sale, roommate)
112
+
113
+ Returns:
114
+ (dynamic_role, description, confidence)
115
+ """
116
+
117
+ base_role_lower = base_role.lower().strip()
118
+ search_for_lower = search_for.lower().strip() if search_for else None
119
+
120
+ logger.info(f"Getting search role: base={base_role_lower}, search_for={search_for_lower}")
121
+
122
+ # LANDLORD searching
123
+ if base_role_lower == "landlord":
124
+ logger.warning("Landlords searching for properties - unusual but allowed")
125
+
126
+ if search_for_lower == "short-stay":
127
+ return DynamicRole.GUEST.value, "Short-stay guest (unusual)", 0.5
128
+ elif search_for_lower == "sale":
129
+ return DynamicRole.BUYER.value, "Property buyer (unusual)", 0.5
130
+ else:
131
+ return DynamicRole.RENTER.value, "Searching for rentals (unusual)", 0.5
132
+
133
+ # RENTER searching
134
+ elif base_role_lower == "renter":
135
+
136
+ # Searching for rentals = RENTER
137
+ if search_for_lower == "rent":
138
+ logger.info("Renter searching for rent -> RENTER role")
139
+ return DynamicRole.RENTER.value, "Rental searcher", 0.99
140
+
141
+ # Searching for short-stay = GUEST
142
+ elif search_for_lower == "short-stay":
143
+ logger.info("Renter searching for short-stay -> GUEST role")
144
+ return DynamicRole.GUEST.value, "Short-stay guest", 0.99
145
+
146
+ # Searching for sale = BUYER
147
+ elif search_for_lower == "sale":
148
+ logger.info("Renter searching for sale -> BUYER role")
149
+ return DynamicRole.BUYER.value, "Property buyer", 0.99
150
+
151
+ # Searching for roommate = ROOMMATE_SEEKER
152
+ elif search_for_lower == "roommate":
153
+ logger.info("Renter searching for roommate -> ROOMMATE_SEEKER role")
154
+ return DynamicRole.ROOMMATE_SEEKER.value, "Roommate seeker", 0.99
155
+
156
+ # Default search = RENTER
157
+ else:
158
+ logger.info("Renter searching (unspecified) -> RENTER role")
159
+ return DynamicRole.RENTER.value, "Property searcher", 0.95
160
+
161
+ logger.warning(f"Unknown base role: {base_role_lower}")
162
+ return None, "Unknown role", 0.0
163
+
164
+ def get_ai_prompt_for_role(self, dynamic_role: str) -> str:
165
+ """
166
+ Get AI prompt context for specific dynamic role
167
+
168
+ Args:
169
+ dynamic_role: The dynamic role (e.g., "host", "seller", "guest")
170
+
171
+ Returns:
172
+ Prompt context string for AI
173
+ """
174
+
175
+ prompts = {
176
+ "landlord": """
177
+ You are helping a landlord list a rental property.
178
+ Focus on: location, monthly/yearly price, bedrooms, bathrooms, furnished/unfurnished, utilities
179
+ Help collect: location, price (monthly), bedrooms, bathrooms, amenities, requirements for tenants
180
+ Pricing context: Monthly or yearly rental rates
181
+ """,
182
+
183
+ "host": """
184
+ You are helping a short-stay property host (Airbnb-style).
185
+ Focus on: location, nightly/daily price, guest amenities, cleanliness, WiFi, kitchen, parking
186
+ Help collect: location, price (nightly), bedrooms, bathrooms, amenities, house rules
187
+ Pricing context: Nightly, daily, or weekly rates
188
+ Emphasize: Guest experience, cleanliness, quick check-in/out, amenities
189
+ """,
190
+
191
+ "seller": """
192
+ You are helping someone sell a property.
193
+ Focus on: location, total sale price, property condition, bedrooms, bathrooms, unique features
194
+ Help collect: location, price (fixed), bedrooms, bathrooms, property type, amenities
195
+ Pricing context: Total sale price (not per month/night)
196
+ Emphasize: Investment potential, property condition, neighborhood, documentation status
197
+ """,
198
+
199
+ "renter": """
200
+ You are helping a renter find a rental property.
201
+ Focus on: budget, location, bedrooms, bathrooms, move-in date, lease terms
202
+ Ask questions about: Budget range, location preference, must-have amenities
203
+ Show filters for: Monthly price range, bedrooms, bathrooms, furnished/unfurnished
204
+ Emphasize: Affordability, proximity to work/school, security, utilities included
205
+ """,
206
+
207
+ "guest": """
208
+ You are helping someone find a short-stay property (Airbnb-style).
209
+ Focus on: budget, location, dates, guest amenities, proximity to attractions
210
+ Ask questions about: Check-in date, check-out date, budget per night, location preference
211
+ Show filters for: Nightly price range, bedrooms, host reviews, amenities
212
+ Emphasize: Guest reviews, cleanliness, host responsiveness, location convenience
213
+ """,
214
+
215
+ "buyer": """
216
+ You are helping someone buy a property.
217
+ Focus on: budget, location, property type, bedrooms, bathrooms, investment potential
218
+ Ask questions about: Total budget, location preference, property type, timeline
219
+ Show filters for: Price range, bedrooms, bathrooms, neighborhood, property type
220
+ Emphasize: Investment returns, property condition, financing options, neighborhood potential
221
+ """,
222
+
223
+ "roommate_seeker": """
224
+ You are helping someone find a room to share with a roommate.
225
+ Focus on: budget, location, roommate compatibility, house rules, utilities included
226
+ Ask questions about: Budget, location, move-in date, roommate preferences
227
+ Show filters for: Monthly budget, bedrooms available, location, utilities included
228
+ Emphasize: Affordable housing, roommate compatibility, house rules, community
229
+ """,
230
+
231
+ "roommate_lister": """
232
+ You are helping a renter list a room for roommate matching.
233
+ Focus on: location, monthly price, available rooms, house rules, utilities included
234
+ Help collect: location, price (monthly), rooms available, bathrooms, house rules, amenities
235
+ Pricing context: Monthly rates split between roommates
236
+ Emphasize: Roommate compatibility, house culture, shared amenities, community
237
+ """,
238
+ }
239
+
240
+ return prompts.get(dynamic_role, prompts["renter"])
241
+
242
+ def validate_role_action(
243
+ self,
244
+ base_role: str,
245
+ action: str,
246
+ listing_or_search_type: str
247
+ ) -> Tuple[bool, str]:
248
+ """
249
+ Validate if user (base role) can perform action with given type
250
+
251
+ Args:
252
+ base_role: User's base role (landlord or renter)
253
+ action: What user wants to do (list, search)
254
+ listing_or_search_type: Type (rent, short-stay, sale, roommate)
255
+
256
+ Returns:
257
+ (is_allowed, reason)
258
+ """
259
+
260
+ base_role_lower = base_role.lower().strip()
261
+ action_lower = action.lower().strip()
262
+ type_lower = listing_or_search_type.lower().strip()
263
+
264
+ logger.info(f"Validating: {base_role_lower} {action_lower} {type_lower}")
265
+
266
+ # LANDLORD rules for LISTING
267
+ if base_role_lower == "landlord" and action_lower == "list":
268
+ if type_lower in ["rent", "short-stay", "sale"]:
269
+ return True, f"Landlord can list {type_lower}"
270
+ elif type_lower == "roommate":
271
+ return False, "Landlords cannot list for roommate matching. Only renters can share rooms."
272
+ else:
273
+ return False, f"Unknown listing type: {type_lower}"
274
+
275
+ # RENTER rules for LISTING
276
+ if base_role_lower == "renter" and action_lower == "list":
277
+ if type_lower == "roommate":
278
+ return True, "Renters can list rooms for roommate matching"
279
+ elif type_lower in ["rent", "short-stay", "sale"]:
280
+ return False, "Renters can only list rooms for roommate matching"
281
+ else:
282
+ return False, f"Unknown listing type: {type_lower}"
283
+
284
+ # SEARCH rules (both can search anything)
285
+ if action_lower == "search":
286
+ return True, f"Can search for {type_lower}"
287
+
288
+ return False, f"Invalid action: {action_lower}"
289
+
290
+
291
+ # Singleton
292
+ _manager = None
293
+
294
+ def get_dynamic_role_manager() -> DynamicRoleManager:
295
+ """Get or create singleton"""
296
+ global _manager
297
+ if _manager is None:
298
+ _manager = DynamicRoleManager()
299
+ return _manager
app/ml/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (180 Bytes). View file
 
app/ml/models/combined_training_data.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ price,bedrooms,bathrooms,location,dataset_source,price_per_bedroom,price_per_bathroom,total_rooms,price_range
2
+ 250000,2,1,Unknown,dummy,125000.0,250000.0,3,mid
3
+ 350000,3,2,Unknown,dummy,116666.66666666667,175000.0,5,mid
4
+ 450000,4,3,Unknown,dummy,112500.0,150000.0,7,mid
app/ml/models/combined_training_data.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"price": 250000, "bedrooms": 2, "bathrooms": 1, "location": "Unknown", "dataset_source": "dummy", "price_per_bedroom": 125000.0, "price_per_bathroom": 250000.0, "total_rooms": 3, "price_range": "mid"}
2
+ {"price": 350000, "bedrooms": 3, "bathrooms": 2, "location": "Unknown", "dataset_source": "dummy", "price_per_bedroom": 116666.66666666667, "price_per_bathroom": 175000.0, "total_rooms": 5, "price_range": "mid"}
3
+ {"price": 450000, "bedrooms": 4, "bathrooms": 3, "location": "Unknown", "dataset_source": "dummy", "price_per_bedroom": 112500.0, "price_per_bathroom": 150000.0, "total_rooms": 7, "price_range": "mid"}
app/ml/models/combined_training_data.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b21dc388814014d2ac5628bc908cd8d1eb26b796b80e32d085602570bb37e457
3
+ size 6104
app/ml/models/dataset_info.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rows": 3,
3
+ "cols": [
4
+ "price",
5
+ "bedrooms",
6
+ "bathrooms",
7
+ "location",
8
+ "dataset_source",
9
+ "price_per_bedroom",
10
+ "price_per_bathroom",
11
+ "total_rooms",
12
+ "price_range"
13
+ ],
14
+ "price": {
15
+ "mean": 350000.0,
16
+ "median": 350000.0,
17
+ "min": 250000.0,
18
+ "max": 450000.0
19
+ },
20
+ "bedrooms": {
21
+ "mean": 3.0,
22
+ "min": 2,
23
+ "max": 4
24
+ },
25
+ "bathrooms": {
26
+ "mean": 2.0,
27
+ "min": 1,
28
+ "max": 3
29
+ },
30
+ "locations": 1
31
+ }
app/ml/models/field_models.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ac017f49114480a299778b1190bcd6a30a30f723abcfae85e34b2331e73c7f
3
+ size 91689397
app/ml/models/ml_listing_extractor.py CHANGED
@@ -1,4 +1,4 @@
1
- # app/ml/ml_listing_extractor.py - Complete ML Extractor with Smart Inference
2
  import json
3
  import numpy as np
4
  from typing import Dict, List, Tuple, Optional
@@ -6,8 +6,6 @@ import logging
6
  import os
7
  import re
8
  from datetime import datetime, timedelta
9
- from sklearn.ensemble import RandomForestClassifier
10
- from sklearn.preprocessing import LabelEncoder
11
  import joblib
12
  from sentence_transformers import SentenceTransformer
13
  import aiohttp
@@ -28,7 +26,7 @@ class CurrencyManager:
28
  self.cache_expiry = {}
29
  self.cache_ttl = 86400 # 24 hours
30
 
31
- logger.info("💱 Currency Manager initialized")
32
 
33
  async def get_currency_for_location(self, location: str) -> Tuple[str, str, str, float]:
34
  """Get currency for a location using geolocation API"""
@@ -39,7 +37,7 @@ class CurrencyManager:
39
  if location_lower in self.location_cache:
40
  cached = self.location_cache[location_lower]
41
  if self._is_cache_valid(f"location_{location_lower}"):
42
- logger.info(f"Location cache hit: {location} {cached['city']} {cached['currency']}")
43
  return cached['currency'], cached['country'], cached['city'], 0.95
44
 
45
  try:
@@ -85,13 +83,13 @@ class CurrencyManager:
85
  }
86
  self._set_cache_expiry(f"location_{location_lower}")
87
 
88
- logger.info(f"Found: {location} {city}, {country} {currency}")
89
  return currency, country, city, 0.93
90
 
91
  except Exception as e:
92
- logger.warning(f"⚠️ Failed to get currency for location {location}: {e}")
93
 
94
- logger.warning(f"Could not determine currency for {location}")
95
  return None, None, location, 0.0
96
 
97
  async def _get_currency_for_country(self, country_name: str, country_code: str = None) -> Optional[str]:
@@ -111,7 +109,7 @@ class CurrencyManager:
111
  currencies = data[0].get('currencies', {})
112
  if currencies:
113
  currency_code = list(currencies.keys())[0]
114
- logger.info(f"Country {country_name} ({country_code}) {currency_code}")
115
  return currency_code
116
 
117
  except Exception as e:
@@ -131,7 +129,7 @@ class CurrencyManager:
131
  cache_key = f"{from_currency}_rates"
132
  if cache_key in self.exchange_rate_cache:
133
  if self._is_cache_valid(cache_key):
134
- logger.info(f"Exchange rate cache hit for {from_currency}")
135
  return self.exchange_rate_cache[cache_key]
136
 
137
  try:
@@ -152,11 +150,11 @@ class CurrencyManager:
152
  self.exchange_rate_cache[cache_key] = filtered_rates
153
  self._set_cache_expiry(cache_key)
154
 
155
- logger.info(f"Fetched exchange rates for {from_currency}")
156
  return filtered_rates
157
 
158
  except Exception as e:
159
- logger.error(f"Failed to fetch exchange rates: {e}")
160
 
161
  return {}
162
 
@@ -176,17 +174,17 @@ class CurrencyManager:
176
  self.location_cache.clear()
177
  self.exchange_rate_cache.clear()
178
  self.cache_expiry.clear()
179
- logger.info("🗑️ Currency caches cleared")
180
 
181
 
182
  class MLListingExtractor:
183
- """ML-powered field extractor with SMART INFERENCE"""
184
 
185
  def __init__(self, model_dir: str = "app/ml/models"):
186
- self.model_dir = model_dir # This will be "app/ml/models" by default
187
  os.makedirs(model_dir, exist_ok=True)
188
 
189
- logger.info("🚀 Loading ML Listing Extractor...")
190
 
191
  # Embedder for semantic similarity
192
  try:
@@ -201,7 +199,7 @@ class MLListingExtractor:
201
  # Currency manager with live APIs
202
  self.currency_mgr = CurrencyManager()
203
 
204
- # Field validators & confidence models
205
  self.field_models = self._load_field_models()
206
 
207
  # Learning history
@@ -209,11 +207,11 @@ class MLListingExtractor:
209
  self.field_patterns = {}
210
  self.user_corrections = {}
211
 
212
- logger.info("ML Extractor loaded with live currency APIs")
213
 
214
  def _load_field_models(self) -> Dict:
215
- """Load or create field ML models"""
216
- # Try multiple possible paths
217
  possible_paths = [
218
  f"{self.model_dir}/field_models.pkl",
219
  "app/ml/models/field_models.pkl",
@@ -224,13 +222,20 @@ class MLListingExtractor:
224
  if os.path.exists(models_file):
225
  try:
226
  models = joblib.load(models_file)
227
- logger.info(f"✅ Loaded field models from {models_file}")
 
 
 
 
 
228
  return models
 
229
  except Exception as e:
230
  logger.warning(f"Failed to load models from {models_file}: {e}")
231
 
232
- logger.info("⚠️ No field models found. Training is required.")
233
- logger.info("Run: python app/ml/trainning/train_complete_model.py")
 
234
 
235
  # Return empty models dict as fallback
236
  return {
@@ -239,59 +244,57 @@ class MLListingExtractor:
239
  "price_patterns": {},
240
  "amenity_frequencies": {},
241
  "embedder": None,
 
242
  }
243
 
244
- # ==================== SMART LISTING TYPE INFERENCE ====================
245
  def infer_listing_type(self, state: Dict, user_role: str = None, user_message: str = None) -> Tuple[str, float]:
246
- """Intelligently infer listing_type from price_type, user role, and message keywords"""
247
 
248
  explicit_type = state.get("listing_type")
249
  price_type = state.get("price_type")
250
 
251
- # 1️⃣ If explicitly stated, use it
252
  if explicit_type and explicit_type in ["rent", "short-stay", "sale", "roommate"]:
253
- logger.info(f"Listing type explicit: {explicit_type}")
254
  return explicit_type, 0.99
255
 
256
- # 2️⃣ User role matters FIRST
257
  if user_role:
258
  user_role_lower = user_role.lower().strip()
259
 
260
  # If user is renter, ALWAYS roommate
261
  if user_role_lower == "renter":
262
- logger.info(f"User is renter roommate listing")
263
  return "roommate", 0.98
264
 
265
  # If user is landlord, check other signals
266
  if user_role_lower == "landlord":
267
- # Check user message for sale keywords
268
  if user_message:
269
  msg_lower = user_message.lower()
270
  sale_keywords = ["sell", "sale", "selling", "for sale", "purchase", "buy"]
271
 
272
  if any(keyword in msg_lower for keyword in sale_keywords):
273
- logger.info(f"Detected sale keywords sale")
274
  return "sale", 0.95
275
 
276
- # 3️⃣ Infer from price_type (short-stay only indicator)
277
  if price_type:
278
  price_type_lower = price_type.lower().strip()
279
 
280
  # Short-stay indicators
281
  if price_type_lower in ["nightly", "daily", "weekly", "night", "day", "week"]:
282
- logger.info(f"Inferred short-stay from price_type: {price_type}")
283
  return "short-stay", 0.95
284
 
285
- # Monthly/Yearly = RENT (not sale!)
286
  elif price_type_lower in ["monthly", "yearly", "month", "year", "mth", "yr"]:
287
- logger.info(f"Inferred rent from price_type: {price_type}")
288
  return "rent", 0.95
289
 
290
- # 4️⃣ Default to rent
291
- logger.warning("⚠️ Could not infer listing_type, defaulting to rent")
292
  return "rent", 0.5
293
 
294
- # ==================== SMART CURRENCY INFERENCE ====================
295
  async def infer_currency(self, state: Dict) -> Tuple[str, str, float]:
296
  """Intelligently infer currency from location"""
297
 
@@ -299,29 +302,28 @@ class MLListingExtractor:
299
  location = state.get("location")
300
  listing_type = state.get("listing_type")
301
 
302
- # 1️⃣ If explicitly stated
303
  if explicit_currency and len(explicit_currency) == 3:
304
- logger.info(f"Currency explicit: {explicit_currency}")
305
  return explicit_currency, location, 0.99
306
 
307
- # 2️⃣ Short-stay always in USD
308
  if listing_type == "short-stay":
309
- logger.info(f"Short-stay detected, using USD")
310
  return "USD", location, 0.98
311
 
312
- # 3️⃣ Infer from location using Nominatim + REST Countries
313
  if location:
314
  currency, country, city, confidence = await self.currency_mgr.get_currency_for_location(location)
315
 
316
  if currency:
317
- logger.info(f"Extracted city: {location} {city}, {country} {currency}")
318
  state["location"] = city
319
  return currency, city, confidence
320
 
321
- logger.warning("⚠️ Could not infer currency, will ask user")
322
  return None, location, 0.0
323
 
324
- # ==================== PRICE CONVERSION FOR DISPLAY ====================
325
  async def convert_price_for_display(self, price: float, from_currency: str, to_currency: str = "USD") -> Dict:
326
  """Convert price using LIVE exchange rates"""
327
 
@@ -346,7 +348,7 @@ class MLListingExtractor:
346
  from_symbol = symbols.get(from_currency, from_currency)
347
  to_symbol = symbols.get(to_currency, to_currency)
348
 
349
- formatted = f"{from_symbol}{price:,} ({to_symbol}{display_price:,.2f})"
350
 
351
  return {
352
  "original_price": price,
@@ -358,7 +360,7 @@ class MLListingExtractor:
358
  }
359
 
360
  except Exception as e:
361
- logger.error(f"Failed to convert price: {e}")
362
 
363
  return {
364
  "original_price": price,
@@ -375,14 +377,14 @@ class MLListingExtractor:
375
 
376
  symbol_map = {
377
  "USD": "$",
378
- "EUR": "",
379
- "GBP": "£",
380
- "NGN": "",
381
- "XOF": "Fr",
382
- "KES": "Ks",
383
- "GHS": "",
384
- "ZAR": "R",
385
- "AED": "د.إ",
386
  }
387
 
388
  for currency in currencies:
@@ -446,15 +448,14 @@ class MLListingExtractor:
446
  "confidence": 0.93
447
  }
448
 
449
- logger.info(f"Extracted location: {address} {city}")
450
  return city, location_info
451
 
452
  except Exception as e:
453
- logger.error(f"Failed to extract location from address: {e}")
454
 
455
  return None, {}
456
 
457
- # ==================== FIELD VALIDATION ====================
458
  def validate_field(self, field_name: str, value: any, user_input: str, user_id: str = None) -> Dict:
459
  """Validate a single field"""
460
 
 
1
+ # app/ml/models/ml_listing_extractor.py - Complete ML Extractor
2
  import json
3
  import numpy as np
4
  from typing import Dict, List, Tuple, Optional
 
6
  import os
7
  import re
8
  from datetime import datetime, timedelta
 
 
9
  import joblib
10
  from sentence_transformers import SentenceTransformer
11
  import aiohttp
 
26
  self.cache_expiry = {}
27
  self.cache_ttl = 86400 # 24 hours
28
 
29
+ logger.info("Currency Manager initialized")
30
 
31
  async def get_currency_for_location(self, location: str) -> Tuple[str, str, str, float]:
32
  """Get currency for a location using geolocation API"""
 
37
  if location_lower in self.location_cache:
38
  cached = self.location_cache[location_lower]
39
  if self._is_cache_valid(f"location_{location_lower}"):
40
+ logger.info(f"Location cache hit: {location} -> {cached['city']} -> {cached['currency']}")
41
  return cached['currency'], cached['country'], cached['city'], 0.95
42
 
43
  try:
 
83
  }
84
  self._set_cache_expiry(f"location_{location_lower}")
85
 
86
+ logger.info(f"Found: {location} -> {city}, {country} -> {currency}")
87
  return currency, country, city, 0.93
88
 
89
  except Exception as e:
90
+ logger.warning(f"Failed to get currency for location {location}: {e}")
91
 
92
+ logger.warning(f"Could not determine currency for {location}")
93
  return None, None, location, 0.0
94
 
95
  async def _get_currency_for_country(self, country_name: str, country_code: str = None) -> Optional[str]:
 
109
  currencies = data[0].get('currencies', {})
110
  if currencies:
111
  currency_code = list(currencies.keys())[0]
112
+ logger.info(f"Country {country_name} ({country_code}) -> {currency_code}")
113
  return currency_code
114
 
115
  except Exception as e:
 
129
  cache_key = f"{from_currency}_rates"
130
  if cache_key in self.exchange_rate_cache:
131
  if self._is_cache_valid(cache_key):
132
+ logger.info(f"Exchange rate cache hit for {from_currency}")
133
  return self.exchange_rate_cache[cache_key]
134
 
135
  try:
 
150
  self.exchange_rate_cache[cache_key] = filtered_rates
151
  self._set_cache_expiry(cache_key)
152
 
153
+ logger.info(f"Fetched exchange rates for {from_currency}")
154
  return filtered_rates
155
 
156
  except Exception as e:
157
+ logger.error(f"Failed to fetch exchange rates: {e}")
158
 
159
  return {}
160
 
 
174
  self.location_cache.clear()
175
  self.exchange_rate_cache.clear()
176
  self.cache_expiry.clear()
177
+ logger.info("Currency caches cleared")
178
 
179
 
180
  class MLListingExtractor:
181
+ """ML-powered field extractor with SMART INFERENCE + TRAINED MODELS"""
182
 
183
  def __init__(self, model_dir: str = "app/ml/models"):
184
+ self.model_dir = model_dir
185
  os.makedirs(model_dir, exist_ok=True)
186
 
187
+ logger.info("Loading ML Listing Extractor...")
188
 
189
  # Embedder for semantic similarity
190
  try:
 
199
  # Currency manager with live APIs
200
  self.currency_mgr = CurrencyManager()
201
 
202
+ # Load trained field models
203
  self.field_models = self._load_field_models()
204
 
205
  # Learning history
 
207
  self.field_patterns = {}
208
  self.user_corrections = {}
209
 
210
+ logger.info("ML Extractor loaded with live currency APIs & trained models")
211
 
212
  def _load_field_models(self) -> Dict:
213
+ """Load trained field ML models from enhanced training"""
214
+
215
  possible_paths = [
216
  f"{self.model_dir}/field_models.pkl",
217
  "app/ml/models/field_models.pkl",
 
222
  if os.path.exists(models_file):
223
  try:
224
  models = joblib.load(models_file)
225
+
226
+ logger.info(f"Loaded trained field models from {models_file}")
227
+ logger.info(f" - location_classifier: {models.get('location_classifier') is not None}")
228
+ logger.info(f" - price_model: {models.get('price_model') is not None}")
229
+ logger.info(f" - price_patterns: {len(models.get('price_patterns', {}))} locations")
230
+
231
  return models
232
+
233
  except Exception as e:
234
  logger.warning(f"Failed to load models from {models_file}: {e}")
235
 
236
+ logger.info("No trained field models found.")
237
+ logger.info(" To train: python scripts/download_training_data.py")
238
+ logger.info(" Then: python scripts/train_models.py")
239
 
240
  # Return empty models dict as fallback
241
  return {
 
244
  "price_patterns": {},
245
  "amenity_frequencies": {},
246
  "embedder": None,
247
+ "scaler": None,
248
  }
249
 
 
250
  def infer_listing_type(self, state: Dict, user_role: str = None, user_message: str = None) -> Tuple[str, float]:
251
+ """Intelligently infer listing_type"""
252
 
253
  explicit_type = state.get("listing_type")
254
  price_type = state.get("price_type")
255
 
256
+ # 1. If explicitly stated, use it
257
  if explicit_type and explicit_type in ["rent", "short-stay", "sale", "roommate"]:
258
+ logger.info(f"Listing type explicit: {explicit_type}")
259
  return explicit_type, 0.99
260
 
261
+ # 2. User role matters FIRST
262
  if user_role:
263
  user_role_lower = user_role.lower().strip()
264
 
265
  # If user is renter, ALWAYS roommate
266
  if user_role_lower == "renter":
267
+ logger.info("User is renter -> roommate listing")
268
  return "roommate", 0.98
269
 
270
  # If user is landlord, check other signals
271
  if user_role_lower == "landlord":
 
272
  if user_message:
273
  msg_lower = user_message.lower()
274
  sale_keywords = ["sell", "sale", "selling", "for sale", "purchase", "buy"]
275
 
276
  if any(keyword in msg_lower for keyword in sale_keywords):
277
+ logger.info("Detected sale keywords -> sale")
278
  return "sale", 0.95
279
 
280
+ # 3. Infer from price_type
281
  if price_type:
282
  price_type_lower = price_type.lower().strip()
283
 
284
  # Short-stay indicators
285
  if price_type_lower in ["nightly", "daily", "weekly", "night", "day", "week"]:
286
+ logger.info(f"Inferred short-stay from price_type: {price_type}")
287
  return "short-stay", 0.95
288
 
289
+ # Monthly/Yearly = RENT
290
  elif price_type_lower in ["monthly", "yearly", "month", "year", "mth", "yr"]:
291
+ logger.info(f"Inferred rent from price_type: {price_type}")
292
  return "rent", 0.95
293
 
294
+ # 4. Default to rent
295
+ logger.warning("Could not infer listing_type, defaulting to rent")
296
  return "rent", 0.5
297
 
 
298
  async def infer_currency(self, state: Dict) -> Tuple[str, str, float]:
299
  """Intelligently infer currency from location"""
300
 
 
302
  location = state.get("location")
303
  listing_type = state.get("listing_type")
304
 
305
+ # 1. If explicitly stated
306
  if explicit_currency and len(explicit_currency) == 3:
307
+ logger.info(f"Currency explicit: {explicit_currency}")
308
  return explicit_currency, location, 0.99
309
 
310
+ # 2. Short-stay always in USD
311
  if listing_type == "short-stay":
312
+ logger.info("Short-stay detected, using USD")
313
  return "USD", location, 0.98
314
 
315
+ # 3. Infer from location
316
  if location:
317
  currency, country, city, confidence = await self.currency_mgr.get_currency_for_location(location)
318
 
319
  if currency:
320
+ logger.info(f"Extracted city: {location} -> {city}, {country} -> {currency}")
321
  state["location"] = city
322
  return currency, city, confidence
323
 
324
+ logger.warning("Could not infer currency, will ask user")
325
  return None, location, 0.0
326
 
 
327
  async def convert_price_for_display(self, price: float, from_currency: str, to_currency: str = "USD") -> Dict:
328
  """Convert price using LIVE exchange rates"""
329
 
 
348
  from_symbol = symbols.get(from_currency, from_currency)
349
  to_symbol = symbols.get(to_currency, to_currency)
350
 
351
+ formatted = f"{from_symbol}{price:,} (approx {to_symbol}{display_price:,.2f})"
352
 
353
  return {
354
  "original_price": price,
 
360
  }
361
 
362
  except Exception as e:
363
+ logger.error(f"Failed to convert price: {e}")
364
 
365
  return {
366
  "original_price": price,
 
377
 
378
  symbol_map = {
379
  "USD": "$",
380
+ "EUR": "EUR",
381
+ "GBP": "GBP",
382
+ "NGN": "NGN",
383
+ "XOF": "XOF",
384
+ "KES": "KES",
385
+ "GHS": "GHS",
386
+ "ZAR": "ZAR",
387
+ "AED": "AED",
388
  }
389
 
390
  for currency in currencies:
 
448
  "confidence": 0.93
449
  }
450
 
451
+ logger.info(f"Extracted location: {address} -> {city}")
452
  return city, location_info
453
 
454
  except Exception as e:
455
+ logger.error(f"Failed to extract location from address: {e}")
456
 
457
  return None, {}
458
 
 
459
  def validate_field(self, field_name: str, value: any, user_input: str, user_id: str = None) -> Dict:
460
  """Validate a single field"""
461
 
app/ml/models/user_role_context_handler.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ user_role_context_handler.py - Handle different user roles and rental models
4
+ Supports: Airbnb (host/guest), African rentals (landlord/renter/tenant)
5
+ """
6
+
7
+ import logging
8
+ from typing import Dict, Tuple, Optional
9
+ from enum import Enum
10
+ import re
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class RentalModel(Enum):
16
+ """Different rental models"""
17
+ AIRBNB = "airbnb" # Short-stay, host/guest model
18
+ AFRICAN_RENTAL = "african" # Long-term rent, landlord/tenant model
19
+ ROOMMATE = "roommate" # Room sharing in existing space
20
+ MIXED = "mixed" # Both types possible
21
+ UNKNOWN = "unknown"
22
+
23
+
24
+ class UserRole:
25
+ """Handle different user roles across rental models"""
26
+
27
+ # Airbnb roles
28
+ AIRBNB_HOST = "airbnb_host"
29
+ AIRBNB_GUEST = "airbnb_guest"
30
+
31
+ # African rental roles
32
+ LANDLORD = "landlord"
33
+ RENTER = "renter"
34
+ TENANT = "tenant" # Alias for renter
35
+
36
+ # Roommate roles
37
+ HOMEOWNER_SEEKING_ROOMMATE = "homeowner_seeking_roommate" # Has space, looking for roommate
38
+ ROOMMATE_SEEKER = "roommate_seeker" # Looking for a room to share
39
+
40
+ # Generic
41
+ OWNER = "owner"
42
+ BUYER = "buyer"
43
+ SELLER = "seller"
44
+
45
+
46
+ class UserRoleDetector:
47
+ """Intelligently detect user role from context"""
48
+
49
+ def __init__(self):
50
+ # Keywords for role detection
51
+ self.host_keywords = {
52
+ "airbnb": ["host", "hosting", "list my property", "list my place", "rent out", "share"],
53
+ "african": ["landlord", "owner", "property owner", "im renting out", "im listing"]
54
+ }
55
+
56
+ self.guest_keywords = {
57
+ "airbnb": ["guest", "book", "looking for place", "need accommodation", "airbnb"],
58
+ "african": ["renter", "tenant", "looking to rent", "seeking", "want to rent", "im looking for"]
59
+ }
60
+
61
+ self.buyer_keywords = ["buy", "purchase", "for sale", "selling", "acquire"]
62
+ self.seller_keywords = ["sell", "selling", "sale", "list for sale"]
63
+
64
+ # Roommate keywords
65
+ self.homeowner_seeking_roommate_keywords = [
66
+ "looking for a roommate", "need a roommate", "seeking roommate",
67
+ "want to share my", "have a spare room", "room available",
68
+ "looking to share", "share my apartment", "share my house",
69
+ "my place is too big", "extra room", "can share"
70
+ ]
71
+
72
+ self.roommate_seeker_keywords = [
73
+ "looking for a room", "seeking a room", "need a room",
74
+ "looking for roommate", "want to share a place", "room for rent",
75
+ "share accommodation", "shared apartment", "shared house",
76
+ "need accommodation", "looking for a place to share"
77
+ ]
78
+
79
+ logger.info("🔍 User Role Detector initialized")
80
+
81
+ def detect_rental_model(self, user_message: str, location: str = None) -> RentalModel:
82
+ """Detect which rental model user is in"""
83
+
84
+ msg_lower = user_message.lower().strip()
85
+
86
+ # Keywords indicating Airbnb model
87
+ airbnb_indicators = ["airbnb", "short stay", "nightly", "daily", "vacation rental", "host"]
88
+
89
+ # Keywords indicating African rental model
90
+ african_indicators = ["landlord", "tenant", "renter", "monthly rent", "long term", "furnished room"]
91
+
92
+ # Keywords indicating roommate model
93
+ roommate_indicators = ["roommate", "share my", "spare room", "share apartment", "shared house", "share a place"]
94
+
95
+ # Check for explicit indicators
96
+ for indicator in roommate_indicators:
97
+ if indicator in msg_lower:
98
+ logger.info(f"🏘️ Detected roommate model: '{indicator}'")
99
+ return RentalModel.ROOMMATE
100
+
101
+ for indicator in airbnb_indicators:
102
+ if indicator in msg_lower:
103
+ logger.info(f"🏨 Detected Airbnb model: '{indicator}'")
104
+ return RentalModel.AIRBNB
105
+
106
+ for indicator in african_indicators:
107
+ if indicator in msg_lower:
108
+ logger.info(f"🏢 Detected African rental model: '{indicator}'")
109
+ return RentalModel.AFRICAN_RENTAL
110
+
111
+ # Location-based inference (African locations more likely = African model)
112
+ if location:
113
+ african_countries = ["benin", "nigeria", "kenya", "ghana", "south africa", "uganda", "senegal"]
114
+ if any(country in location.lower() for country in african_countries):
115
+ logger.info(f"📍 African location detected: {location}")
116
+ return RentalModel.AFRICAN_RENTAL
117
+
118
+ # Default to mixed
119
+ return RentalModel.MIXED
120
+
121
+ def detect_user_role(self, user_message: str, rental_model: RentalModel = None) -> Tuple[str, float]:
122
+ """
123
+ Detect user role from message
124
+ Returns: (role, confidence)
125
+ """
126
+
127
+ msg_lower = user_message.lower().strip()
128
+
129
+ if rental_model is None:
130
+ rental_model = self.detect_rental_model(user_message)
131
+
132
+ # ==================== SELLER / LANDLORD ====================
133
+
134
+ # Check for explicit landlord/owner language
135
+ landlord_explicit = ["im a landlord", "im the landlord", "i own", "i own this", "as a landlord"]
136
+ for phrase in landlord_explicit:
137
+ if phrase in msg_lower:
138
+ logger.info(f"✅ Explicit landlord detected: '{phrase}'")
139
+ return UserRole.LANDLORD, 0.99
140
+
141
+ # Check for listing/rental language
142
+ if rental_model == RentalModel.AFRICAN_RENTAL:
143
+ landlord_signals = [
144
+ "im listing", "list my", "im renting out", "property for rent",
145
+ "available for rent", "i have a", "i own a"
146
+ ]
147
+ for signal in landlord_signals:
148
+ if signal in msg_lower:
149
+ logger.info(f"🏠 African landlord signal: '{signal}'")
150
+ return UserRole.LANDLORD, 0.90
151
+
152
+ if rental_model == RentalModel.AIRBNB:
153
+ host_signals = ["im hosting", "im a host", "list on airbnb", "airbnb host", "share my place"]
154
+ for signal in host_signals:
155
+ if signal in msg_lower:
156
+ logger.info(f"🏨 Airbnb host signal: '{signal}'")
157
+ return UserRole.AIRBNB_HOST, 0.90
158
+
159
+ # ==================== BUYER / SELLER (SALE) ====================
160
+
161
+ # Explicit sale language
162
+ seller_signals = ["im selling", "for sale", "sell my", "selling property", "list for sale"]
163
+ for signal in seller_signals:
164
+ if signal in msg_lower:
165
+ logger.info(f"💰 Seller detected: '{signal}'")
166
+ return UserRole.SELLER, 0.95
167
+
168
+ buyer_signals = ["want to buy", "looking to purchase", "im buying", "purchase property"]
169
+ for signal in buyer_signals:
170
+ if signal in msg_lower:
171
+ logger.info(f"💳 Buyer detected: '{signal}'")
172
+ return UserRole.BUYER, 0.95
173
+
174
+ # ==================== RENTER / GUEST ====================
175
+
176
+ # Check for explicit renter language
177
+ renter_explicit = ["im a tenant", "im a renter", "im looking to rent", "looking for a place to rent"]
178
+ for phrase in renter_explicit:
179
+ if phrase in msg_lower:
180
+ logger.info(f"✅ Explicit renter/tenant detected: '{phrase}'")
181
+
182
+ if rental_model == RentalModel.AFRICAN_RENTAL:
183
+ return UserRole.TENANT, 0.99
184
+ else:
185
+ return UserRole.AIRBNB_GUEST, 0.99
186
+
187
+ # ==================== ROOMMATE ROLES ====================
188
+
189
+ # Homeowner seeking roommate
190
+ for keyword in self.homeowner_seeking_roommate_keywords:
191
+ if keyword in msg_lower:
192
+ logger.info(f"✅ Homeowner seeking roommate detected: '{keyword}'")
193
+ return UserRole.HOMEOWNER_SEEKING_ROOMMATE, 0.90
194
+
195
+ # Roommate seeker
196
+ for keyword in self.roommate_seeker_keywords:
197
+ if keyword in msg_lower:
198
+ logger.info(f"✅ Roommate seeker detected: '{keyword}'")
199
+ return UserRole.ROOMMATE_SEEKER, 0.90
200
+
201
+ # Guest/renter signals
202
+ if rental_model == RentalModel.AFRICAN_RENTAL:
203
+ renter_signals = [
204
+ "looking for a", "need a", "seeking", "want to rent",
205
+ "im looking for", "show me", "what do you have", "available rooms"
206
+ ]
207
+ for signal in renter_signals:
208
+ if signal in msg_lower:
209
+ logger.info(f"🔍 African renter signal: '{signal}'")
210
+ return UserRole.RENTER, 0.80
211
+
212
+ if rental_model == RentalModel.AIRBNB:
213
+ guest_signals = [
214
+ "looking for accommodation", "need a place", "book",
215
+ "where can i stay", "available places", "show me listings"
216
+ ]
217
+ for signal in guest_signals:
218
+ if signal in msg_lower:
219
+ logger.info(f"🔍 Airbnb guest signal: '{signal}'")
220
+ return UserRole.AIRBNB_GUEST, 0.80
221
+
222
+ logger.warning(f"⚠️ Could not determine user role from: {user_message}")
223
+ return None, 0.0
224
+
225
+ def validate_role_consistency(self, user_role: str, rental_model: RentalModel) -> bool:
226
+ """Validate that role matches rental model"""
227
+
228
+ valid_combinations = {
229
+ RentalModel.AIRBNB: [UserRole.AIRBNB_HOST, UserRole.AIRBNB_GUEST],
230
+ RentalModel.AFRICAN_RENTAL: [UserRole.LANDLORD, UserRole.RENTER, UserRole.TENANT],
231
+ RentalModel.ROOMMATE: [UserRole.HOMEOWNER_SEEKING_ROOMMATE, UserRole.ROOMMATE_SEEKER],
232
+ RentalModel.MIXED: [UserRole.LANDLORD, UserRole.RENTER, UserRole.TENANT,
233
+ UserRole.AIRBNB_HOST, UserRole.AIRBNB_GUEST,
234
+ UserRole.HOMEOWNER_SEEKING_ROOMMATE, UserRole.ROOMMATE_SEEKER],
235
+ }
236
+
237
+ valid = valid_combinations.get(rental_model, [])
238
+
239
+ if user_role in valid:
240
+ logger.info(f"✅ Role {user_role} valid for {rental_model.value}")
241
+ return True
242
+
243
+ logger.warning(f"⚠️ Role {user_role} may not match {rental_model.value}")
244
+ return False
245
+
246
+
247
+ class RoleBasedInferenceEngine:
248
+ """Adapt inference based on user role and rental model"""
249
+
250
+ def __init__(self):
251
+ self.role_detector = UserRoleDetector()
252
+ logger.info("🧠 Role-based Inference Engine initialized")
253
+
254
+ def infer_listing_type(self, state: Dict, user_message: str, rental_model: RentalModel = None) -> Tuple[str, float]:
255
+ """
256
+ Infer listing type based on user role and rental model
257
+
258
+ Returns: (listing_type, confidence)
259
+ """
260
+
261
+ # Detect rental model
262
+ if rental_model is None:
263
+ rental_model = self.role_detector.detect_rental_model(user_message, state.get("location"))
264
+
265
+ # Detect user role
266
+ user_role, role_confidence = self.role_detector.detect_user_role(user_message, rental_model)
267
+
268
+ logger.info(f"🔍 Rental Model: {rental_model.value}")
269
+ logger.info(f"👤 User Role: {user_role} (confidence: {role_confidence:.0%})")
270
+
271
+ # Store in state for later use
272
+ state["rental_model"] = rental_model.value
273
+ state["user_role"] = user_role
274
+
275
+ # ==================== AIRBNB MODEL ====================
276
+
277
+ if rental_model == RentalModel.AIRBNB:
278
+
279
+ # Host listing = short-stay
280
+ if user_role == UserRole.AIRBNB_HOST:
281
+ logger.info("📍 Host → short-stay listing")
282
+ return "short-stay", 0.98
283
+
284
+ # Guest searching = just needs to search
285
+ if user_role == UserRole.AIRBNB_GUEST:
286
+ logger.info("📍 Guest → searching for short-stay")
287
+ return "short-stay", 0.95
288
+
289
+ # ==================== AFRICAN RENTAL MODEL ====================
290
+
291
+ elif rental_model == RentalModel.AFRICAN_RENTAL:
292
+
293
+ # Landlord listing = rent listing
294
+ if user_role in [UserRole.LANDLORD, UserRole.OWNER]:
295
+ logger.info("📍 Landlord → rent listing")
296
+ return "rent", 0.98
297
+
298
+ # Renter/tenant searching = rent listing
299
+ if user_role in [UserRole.RENTER, UserRole.TENANT]:
300
+ logger.info("📍 Tenant/Renter → searching for rent")
301
+ return "rent", 0.95
302
+
303
+ # ==================== ROOMMATE MODEL ====================
304
+
305
+ elif rental_model == RentalModel.ROOMMATE:
306
+
307
+ # Homeowner seeking roommate = roommate listing
308
+ if user_role == UserRole.HOMEOWNER_SEEKING_ROOMMATE:
309
+ logger.info("📍 Homeowner → roommate listing")
310
+ return "roommate", 0.98
311
+
312
+ # Roommate seeker = searching roommate
313
+ if user_role == UserRole.ROOMMATE_SEEKER:
314
+ logger.info("📍 Roommate seeker → searching for roommate")
315
+ return "roommate", 0.95
316
+
317
+ # ==================== SALE MODEL (both) ====================
318
+
319
+ if user_role == UserRole.SELLER:
320
+ logger.info("📍 Seller → sale listing")
321
+ return "sale", 0.98
322
+
323
+ if user_role == UserRole.BUYER:
324
+ logger.info("📍 Buyer → searching for sale")
325
+ return "sale", 0.95
326
+
327
+ # Fallback: check explicit listing_type
328
+ explicit_type = state.get("listing_type")
329
+ if explicit_type:
330
+ logger.info(f"📍 Using explicit listing_type: {explicit_type}")
331
+ return explicit_type, 0.85
332
+
333
+ logger.warning("⚠️ Could not infer listing_type, defaulting to rent")
334
+ return "rent", 0.5
335
+
336
+ def adapt_field_extraction(self, state: Dict, user_message: str) -> Dict:
337
+ """
338
+ Adapt field extraction based on user role and rental model
339
+ """
340
+
341
+ rental_model = self.role_detector.detect_rental_model(user_message, state.get("location"))
342
+ user_role, _ = self.role_detector.detect_user_role(user_message, rental_model)
343
+
344
+ extraction_config = {
345
+ "rental_model": rental_model.value,
346
+ "user_role": user_role,
347
+ "required_fields": [],
348
+ "price_type_suggestions": [],
349
+ "amenity_focus": [],
350
+ "validation_rules": []
351
+ }
352
+
353
+ # ==================== AIRBNB HOST ====================
354
+ if user_role == UserRole.AIRBNB_HOST:
355
+ extraction_config["required_fields"] = [
356
+ "location", "bedrooms", "bathrooms", "price", "amenities"
357
+ ]
358
+ extraction_config["price_type_suggestions"] = ["nightly", "daily", "weekly"]
359
+ extraction_config["amenity_focus"] = ["wifi", "parking", "pool", "kitchen", "ac"]
360
+ extraction_config["validation_rules"] = [
361
+ "price must be per night (nightly/daily)",
362
+ "bedrooms minimum 1",
363
+ "bathrooms can be shared"
364
+ ]
365
+
366
+ # ==================== AIRBNB GUEST ====================
367
+ elif user_role == UserRole.AIRBNB_GUEST:
368
+ extraction_config["required_fields"] = ["location", "check_in", "check_out"]
369
+ extraction_config["price_type_suggestions"] = ["nightly"]
370
+ extraction_config["amenity_focus"] = ["wifi", "kitchen", "parking"]
371
+ extraction_config["validation_rules"] = [
372
+ "check dates for availability",
373
+ "show prices in nightly rates"
374
+ ]
375
+
376
+ # ==================== LANDLORD (African) ====================
377
+ elif user_role == UserRole.LANDLORD:
378
+ extraction_config["required_fields"] = [
379
+ "location", "bedrooms", "bathrooms", "price", "price_type", "furnished"
380
+ ]
381
+ extraction_config["price_type_suggestions"] = ["monthly", "yearly"]
382
+ extraction_config["amenity_focus"] = [
383
+ "furnished", "kitchen", "water", "electricity", "security"
384
+ ]
385
+ extraction_config["validation_rules"] = [
386
+ "price must be monthly or yearly",
387
+ "specify if furnished/unfurnished",
388
+ "include utility info if available",
389
+ "bedrooms and bathrooms required"
390
+ ]
391
+
392
+ # ==================== RENTER/TENANT (African) ====================
393
+ elif user_role in [UserRole.RENTER, UserRole.TENANT]:
394
+ extraction_config["required_fields"] = [
395
+ "location", "budget", "bedrooms", "price_type"
396
+ ]
397
+ extraction_config["price_type_suggestions"] = ["monthly", "yearly"]
398
+ extraction_config["amenity_focus"] = [
399
+ "furnished", "security", "water", "electricity", "parking"
400
+ ]
401
+ extraction_config["validation_rules"] = [
402
+ "show monthly/yearly prices",
403
+ "filter by budget",
404
+ "highlight furnished options",
405
+ "show security features"
406
+ ]
407
+
408
+ # ==================== HOMEOWNER SEEKING ROOMMATE ====================
409
+ elif user_role == UserRole.HOMEOWNER_SEEKING_ROOMMATE:
410
+ extraction_config["required_fields"] = [
411
+ "location", "bedrooms_available", "bathrooms_available", "price", "price_type"
412
+ ]
413
+ extraction_config["price_type_suggestions"] = ["monthly", "yearly"]
414
+ extraction_config["amenity_focus"] = [
415
+ "furnished", "utilities_included", "kitchen_access", "laundry",
416
+ "internet", "parking", "living_room_access"
417
+ ]
418
+ extraction_config["validation_rules"] = [
419
+ "price must be monthly or yearly",
420
+ "specify which rooms are available",
421
+ "describe house/apartment condition",
422
+ "list utilities included",
423
+ "mention house rules"
424
+ ]
425
+
426
+ # ==================== ROOMMATE SEEKER ====================
427
+ elif user_role == UserRole.ROOMMATE_SEEKER:
428
+ extraction_config["required_fields"] = [
429
+ "location", "budget", "move_in_date"
430
+ ]
431
+ extraction_config["price_type_suggestions"] = ["monthly", "yearly"]
432
+ extraction_config["amenity_focus"] = [
433
+ "furnished", "utilities_included", "kitchen_access", "internet",
434
+ "parking", "proximity_to_work"
435
+ ]
436
+ extraction_config["validation_rules"] = [
437
+ "show monthly/yearly prices",
438
+ "filter by budget",
439
+ "check roommate compatibility",
440
+ "show lease terms"
441
+ ]
442
+
443
+ # ==================== SELLER ====================
444
+ elif user_role == UserRole.SELLER:
445
+ extraction_config["required_fields"] = [
446
+ "location", "bedrooms", "bathrooms", "price", "property_type"
447
+ ]
448
+ extraction_config["price_type_suggestions"] = ["fixed"]
449
+ extraction_config["amenity_focus"] = ["land size", "property type", "condition"]
450
+ extraction_config["validation_rules"] = [
451
+ "price is total sale price",
452
+ "property type required (apartment, house, etc)",
453
+ "include land/property size if known"
454
+ ]
455
+
456
+ # ==================== BUYER ====================
457
+ elif user_role == UserRole.BUYER:
458
+ extraction_config["required_fields"] = [
459
+ "location", "budget", "bedrooms", "property_type"
460
+ ]
461
+ extraction_config["price_type_suggestions"] = []
462
+ extraction_config["amenity_focus"] = ["property type", "land size", "condition"]
463
+ extraction_config["validation_rules"] = [
464
+ "show total sale prices",
465
+ "filter by budget range",
466
+ "group by property type"
467
+ ]
468
+
469
+ logger.info(f"✅ Extraction config adapted for {user_role}")
470
+
471
+ return extraction_config
472
+
473
+ def get_role_context_prompt(self, user_role: str, rental_model: str) -> str:
474
+ """Get AI prompt context based on role"""
475
+
476
+ prompts = {
477
+ UserRole.AIRBNB_HOST: """
478
+ You are helping an Airbnb host list their property.
479
+ - Focus on: short-stay rental features, nightly rates, guest amenities
480
+ - Price type: nightly/daily/weekly
481
+ - Emphasize: WiFi, kitchen, parking, cleanliness
482
+ """,
483
+
484
+ UserRole.AIRBNB_GUEST: """
485
+ You are helping someone find an Airbnb accommodation.
486
+ - Focus on: guest experience, amenities, location convenience
487
+ - Price type: show nightly rates
488
+ - Emphasize: cleanliness, safety, host responsiveness
489
+ """,
490
+
491
+ UserRole.LANDLORD: """
492
+ You are helping an African landlord/property owner list a rental.
493
+ - Focus on: long-term rental (monthly/yearly), tenant features, property durability
494
+ - Price type: monthly or yearly
495
+ - Emphasize: furnished/unfurnished, utilities, security, maintenance
496
+ - Include: lease terms, deposit requirements
497
+ """,
498
+
499
+ UserRole.RENTER: """
500
+ You are helping a tenant/renter find an apartment or room.
501
+ - Focus on: long-term rental suitability, affordability, amenities for living
502
+ - Price type: monthly or yearly budget
503
+ - Emphasize: security, utilities included, furnished options, commute
504
+ - Ask about: move-in date, lease length, budget
505
+ """,
506
+
507
+ UserRole.TENANT: """
508
+ You are helping a tenant/renter find an apartment or room.
509
+ - Focus on: long-term rental suitability, affordability, amenities for living
510
+ - Price type: monthly or yearly budget
511
+ - Emphasize: security, utilities included, furnished options, commute
512
+ - Ask about: move-in date, lease length, budget
513
+ """,
514
+
515
+ UserRole.SELLER: """
516
+ You are helping someone sell a property.
517
+ - Focus on: property value, unique features, condition, potential
518
+ - Price type: total sale price
519
+ - Emphasize: location, size, renovations, investment potential
520
+ - Include: property history, legal documents status
521
+ """,
522
+
523
+ UserRole.BUYER: """
524
+ You are helping someone find and purchase a property.
525
+ - Focus on: property value, investment potential, location
526
+ - Price type: show total purchase price
527
+ - Emphasize: property condition, neighborhood, future value
528
+ - Include: financing options, inspection recommendations
529
+ """,
530
+
531
+ UserRole.HOMEOWNER_SEEKING_ROOMMATE: """
532
+ You are helping someone find a roommate to share their home with.
533
+ - Focus on: compatibility, house/apartment details, shared spaces
534
+ - Price type: monthly or yearly
535
+ - Emphasize: house rules, utilities included, available rooms, amenities
536
+ - Include: lease terms, deposit, move-in date, roommate preferences
537
+ - Ask about: their lifestyle, work schedule, cleanliness standards
538
+ """,
539
+
540
+ UserRole.ROOMMATE_SEEKER: """
541
+ You are helping someone find a room to share with a roommate.
542
+ - Focus on: affordability, roommate compatibility, location, utilities
543
+ - Price type: monthly or yearly budget
544
+ - Emphasize: house rules, amenities, commute, lifestyle fit
545
+ - Include: move-in date, lease length, deposit requirements
546
+ - Ask about: budget, preferred location, work/study location, lifestyle
547
+ """
548
+ }
549
+
550
+ return prompts.get(user_role, "")
551
+
552
+
553
+ # ==================== EXAMPLE USAGE ====================
554
+
555
+ if __name__ == "__main__":
556
+ logging.basicConfig(level=logging.INFO)
557
+
558
+ engine = RoleBasedInferenceEngine()
559
+
560
+ # Test cases
561
+ test_cases = [
562
+ # Airbnb host
563
+ ("I'm a host on Airbnb and want to list my apartment in Lagos", "Lagos"),
564
+
565
+ # Airbnb guest
566
+ ("I'm looking for accommodation on Airbnb in Accra next week", "Accra"),
567
+
568
+ # African landlord
569
+ ("I'm a landlord in Cotonou with a 2-bedroom apartment for monthly rent", "Cotonou"),
570
+
571
+ # African tenant
572
+ ("I'm looking to rent a furnished room in Nairobi, my budget is 30000 KES per month", "Nairobi"),
573
+
574
+ # Homeowner seeking roommate
575
+ ("My house in Lagos is too big for just me. I have 2 extra bedrooms and want to share", "Lagos"),
576
+
577
+ # Roommate seeker
578
+ ("I'm looking for a room to share in Accra, somewhere near my workplace", "Accra"),
579
+
580
+ # Seller
581
+ ("I want to sell my house in Lagos for 50 million NGN", "Lagos"),
582
+
583
+ # Buyer
584
+ ("I'm looking to buy a 3-bedroom apartment in Cape Town", "Cape Town"),
585
+ ]
586
+
587
+ print("\n" + "="*70)
588
+ print("🧠 ROLE-BASED INFERENCE ENGINE TEST")
589
+ print("="*70 + "\n")
590
+
591
+ for message, location in test_cases:
592
+ print(f"📝 Message: {message}")
593
+ print(f"📍 Location: {location}\n")
594
+
595
+ state = {"location": location}
596
+ listing_type, confidence = engine.infer_listing_type(state, message)
597
+
598
+ print(f"✅ Listing Type: {listing_type} (confidence: {confidence:.0%})")
599
+
600
+ config = engine.adapt_field_extraction(state, message)
601
+ print(f"📋 Required fields: {', '.join(config['required_fields'])}")
602
+ print(f"💰 Price types: {', '.join(config['price_type_suggestions'])}")
603
+
604
+ prompt = engine.get_role_context_prompt(config['user_role'], config['rental_model'])
605
+ print(f"🎯 AI Context:\n{prompt}")
606
+
607
+ print("-" * 70 + "\n")
app/ml/trainning/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ """ML training module"""
app/ml/trainning/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (223 Bytes). View file
 
app/ml/trainning/__pycache__/hf_dataset_downloader.cpython-313.pyc ADDED
Binary file (11.6 kB). View file
 
app/ml/trainning/__pycache__/train_enhanced_model.cpython-313.pyc ADDED
Binary file (7.45 kB). View file
 
app/ml/trainning/hf_dataset_downloader.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ hf_dataset_downloader.py – bullet-proof, real-tabular real-estate data
4
+ Run: python scripts/download_training_data.py
5
+ """
6
+
7
+ import os, json, logging, pandas as pd, numpy as np
8
+ from typing import List, Tuple, Optional
9
+
10
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(message)s")
11
+ log = logging.getLogger(__name__)
12
+
13
+ CACHE = "./hf_datasets"
14
+ OUT = "app/ml/models"
15
+
16
+ # ------------------------------------------------ one working source ---
17
+ SOURCES: List[Tuple[str, str, str]] = [
18
+ ("california_housing", "csv", "https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv"),
19
+ ]
20
+
21
+ COL_MAP = {
22
+ "price": ["price", "median_house_value", "sale_price", "cost"],
23
+ "bedrooms": ["bedrooms", "total_bedrooms", "beds"],
24
+ "bathrooms": ["bathrooms", "total_bathrooms", "baths"],
25
+ "location": ["location", "ocean_proximity", "city", "address", "region"],
26
+ }
27
+
28
+ # -----------------------------------------------------------------------
29
+ class HuggingFaceDatasetDownloader:
30
+ def __init__(self, cache_dir: str = CACHE, output_dir: str = OUT):
31
+ self.cache_dir = cache_dir
32
+ self.output_dir = output_dir
33
+ os.makedirs(cache_dir, exist_ok=True)
34
+ os.makedirs(output_dir, exist_ok=True)
35
+
36
+ # ------------ fetch -------------------------------------------------
37
+ def _get_csv(self, url: str) -> Optional[pd.DataFrame]:
38
+ try:
39
+ df = pd.read_csv(url)
40
+ log.info("CSV rows=%d url=%.60s", len(df), url)
41
+ return df
42
+ except Exception as e:
43
+ log.error("CSV fail: %s", e)
44
+ return None
45
+
46
+ # ------------ normalise ---------------------------------------------
47
+ def _normalise(self, df: pd.DataFrame, name: str) -> Optional[pd.DataFrame]:
48
+ log.info("Normalising %s …", name)
49
+ print(f"\n=== {name} COLUMNS ===\n{list(df.columns)}\n{df.head(2)}")
50
+
51
+ out = pd.DataFrame()
52
+ for std, variants in COL_MAP.items():
53
+ for v in variants:
54
+ if v in df.columns:
55
+ out[std] = df[v]
56
+ log.info(" mapped %s → %s", v, std)
57
+ break
58
+
59
+ # price is mandatory
60
+ if out.get("price") is None:
61
+ log.warning("No price column in %s", name)
62
+ return None
63
+ out["price"] = pd.to_numeric(out["price"], errors="coerce").dropna()
64
+ if out["price"].empty:
65
+ log.warning("Price column empty after coerce")
66
+ return None
67
+
68
+ # safe fall-backs (Series, not scalars)
69
+ if "location" not in out.columns:
70
+ out["location"] = "Unknown"
71
+ out["location"] = out["location"].fillna("Unknown")
72
+
73
+ # bedrooms / bathrooms – create Series first, then fillna
74
+ bedrooms_series = pd.to_numeric(out.get("bedrooms", 2), errors="coerce")
75
+ bathrooms_series = pd.to_numeric(out.get("bathrooms", 1), errors="coerce")
76
+
77
+ out["bedrooms"] = bedrooms_series.fillna(2).astype(int)
78
+ out["bathrooms"] = bathrooms_series.fillna(1).astype(int)
79
+
80
+ out["dataset_source"] = name
81
+ out = out.dropna(subset=["price"])
82
+ log.info("Normalised → %d rows", len(out))
83
+ return out
84
+
85
+ # ------------ combine ----------------------------------------------
86
+ def _combine(self, frames: List[Tuple[str, pd.DataFrame]]) -> pd.DataFrame:
87
+ cleaned = [self._normalise(df, name) for name, df in frames if df is not None]
88
+ cleaned = [c for c in cleaned if c is not None and len(c)]
89
+ if not cleaned:
90
+ log.warning("No valid frames – creating minimal dummy so training can run")
91
+ dummy = pd.DataFrame({
92
+ "price": [250_000, 350_000, 450_000],
93
+ "bedrooms": [2, 3, 4],
94
+ "bathrooms": [1, 2, 3],
95
+ "location": ["Unknown", "Unknown", "Unknown"],
96
+ "dataset_source": ["dummy"] * 3,
97
+ })
98
+ return dummy
99
+ final = pd.concat(cleaned, ignore_index=True)
100
+ log.info("Combined → %d rows", len(final))
101
+ return final
102
+
103
+ # ------------ augment ----------------------------------------------
104
+ def _augment(self, df: pd.DataFrame) -> pd.DataFrame:
105
+ log.info("Augmenting …")
106
+ df["price_per_bedroom"] = df["price"] / df["bedrooms"].clip(1)
107
+ df["price_per_bathroom"] = df["price"] / df["bathrooms"].clip(1)
108
+ df["total_rooms"] = df["bedrooms"] + df["bathrooms"]
109
+ df["price_range"] = pd.cut(
110
+ df["price"],
111
+ bins=[0, 200_000, 500_000, 1_000_000, np.inf],
112
+ labels=["low", "mid", "high", "luxury"],
113
+ )
114
+ return df
115
+
116
+ # ------------ save --------------------------------------------------
117
+ def _save(self, df: pd.DataFrame, basename: str = "combined_training_data"):
118
+ csv = f"{self.output_dir}/{basename}.csv"
119
+ parq = f"{self.output_dir}/{basename}.parquet"
120
+ jsnl = f"{self.output_dir}/{basename}.jsonl"
121
+
122
+ df.to_csv(csv, index=False)
123
+ df.to_parquet(parq, index=False)
124
+ with open(jsnl, "w", encoding="utf-8") as f:
125
+ for rec in df.to_dict(orient="records"):
126
+ f.write(json.dumps(rec, default=str) + "\n")
127
+
128
+ log.info("Saved CSV : %s", csv)
129
+ log.info("Saved Parq : %s", parq)
130
+ log.info("Saved JSONL: %s", jsnl)
131
+ return {"csv": csv, "parquet": parq, "jsonl": jsnl}
132
+
133
+ # ------------ stats -------------------------------------------------
134
+ def _save_info(self, df: pd.DataFrame):
135
+ info = {
136
+ "rows": len(df),
137
+ "cols": list(df.columns),
138
+ "price": {"mean": float(df["price"].mean()), "median": float(df["price"].median()),
139
+ "min": float(df["price"].min()), "max": float(df["price"].max())},
140
+ "bedrooms": {"mean": float(df["bedrooms"].mean()), "min": int(df["bedrooms"].min()), "max": int(df["bedrooms"].max())},
141
+ "bathrooms": {"mean": float(df["bathrooms"].mean()), "min": int(df["bathrooms"].min()), "max": int(df["bathrooms"].max())},
142
+ "locations": df["location"].nunique(),
143
+ }
144
+ path = f"{self.output_dir}/dataset_info.json"
145
+ with open(path, "w") as f:
146
+ json.dump(info, f, indent=2)
147
+ log.info("Stats → %s", path)
148
+
149
+ # ------------ main --------------------------------------------------
150
+ def download_and_prepare(self) -> pd.DataFrame:
151
+ print("\n" + "=" * 80)
152
+ print("REAL-ESTATE TABULAR DOWNLOADER (BULLET-PROOF)")
153
+ print("=" * 80 + "\n")
154
+
155
+ raw: List[Tuple[str, pd.DataFrame]] = []
156
+ for name, typ, src in SOURCES:
157
+ log.info("Getting %s …", name)
158
+ if typ == "csv":
159
+ raw.append((name, self._get_csv(src)))
160
+ else:
161
+ log.warning("Unknown type %s", typ)
162
+
163
+ raw = [(n, d) for n, d in raw if d is not None]
164
+ combined = self._combine(raw)
165
+ combined = self._augment(combined)
166
+ paths = self._save(combined)
167
+ self._save_info(combined)
168
+
169
+ print("\n" + "=" * 80)
170
+ print("DOWNLOAD COMPLETE")
171
+ print("=" * 80)
172
+ print(f"Rows : {len(combined):,}")
173
+ print(f"Files: {paths}")
174
+ print("\nNext → python scripts/train_models.py\n")
175
+ return combined
176
+
177
+
178
+ if __name__ == "__main__":
179
+ HuggingFaceDatasetDownloader().download_and_prepare()
app/ml/trainning/train_enhanced_model.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ train_enhanced_model.py – train on real tabular data
4
+ Run: python scripts/train_models.py
5
+ """
6
+
7
+ import json, os, joblib, logging, pandas as pd, numpy as np
8
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
9
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score
12
+ from sentence_transformers import SentenceTransformer
13
+
14
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(message)s")
15
+ log = logging.getLogger(__name__)
16
+
17
+ MODEL_DIR = "app/ml/models"
18
+ DATA_FILE = f"{MODEL_DIR}/combined_training_data.parquet"
19
+ MODEL_PATH = f"{MODEL_DIR}/field_models.pkl"
20
+
21
+ class EnhancedModelTrainer:
22
+ def __init__(self, model_dir: str = MODEL_DIR, data_file: str = DATA_FILE):
23
+ self.model_dir = model_dir
24
+ self.data_file = data_file
25
+ os.makedirs(model_dir, exist_ok=True)
26
+
27
+ # ------------ load ----------------------------------------------
28
+ def _load(self) -> pd.DataFrame | None:
29
+ if not os.path.exists(self.data_file):
30
+ log.error("Data not found → run download_training_data.py first")
31
+ return None
32
+ df = pd.read_parquet(self.data_file)
33
+ log.info("Loaded %d rows", len(df))
34
+ return df
35
+
36
+ # ------------ prep ----------------------------------------------
37
+ def _prep(self, df: pd.DataFrame):
38
+ log.info("Preparing features …")
39
+ X_num = df[["bedrooms", "bathrooms", "price_per_bedroom", "total_rooms"]].fillna(0)
40
+ self.scaler = StandardScaler().fit(X_num)
41
+
42
+ # embeddings from location string
43
+ self.embedder = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="./models")
44
+ X_text = self.embedder.encode(df["location"].astype(str).tolist())
45
+
46
+ X = np.hstack([self.scaler.transform(X_num), X_text])
47
+ y_price = df["price"].values
48
+ y_loc = LabelEncoder().fit_transform(df["location"])
49
+ return X, y_price, y_loc
50
+
51
+ # ------------ train ---------------------------------------------
52
+ def _train(self, X, y_price, y_loc, df: pd.DataFrame):
53
+ log.info("Training models …")
54
+ X_train, X_test, yp_train, yp_test, yl_train, yl_test = train_test_split(
55
+ X, y_price, y_loc, test_size=0.2, random_state=42
56
+ )
57
+
58
+ # 1. price regressor
59
+ price_model = RandomForestRegressor(
60
+ n_estimators=300, max_depth=20, random_state=42, n_jobs=-1
61
+ )
62
+ price_model.fit(X_train, yp_train)
63
+ y_pred = price_model.predict(X_test)
64
+ log.info("Price R² = %.3f MAE = $%.0f", r2_score(yp_test, y_pred), mean_absolute_error(yp_test, y_pred))
65
+
66
+ # 2. location classifier
67
+ loc_model = RandomForestClassifier(
68
+ n_estimators=300, max_depth=None, random_state=42, n_jobs=-1
69
+ )
70
+ loc_model.fit(X_train, yl_train)
71
+ log.info("Location accuracy = %.2f%%", 100 * accuracy_score(yl_test, loc_model.predict(X_test)))
72
+
73
+ # 3. price patterns
74
+ patterns = (
75
+ df.groupby("location")["price"]
76
+ .agg(["mean", "median", "std", "min", "max", "count"])
77
+ .round(0)
78
+ .to_dict(orient="index")
79
+ )
80
+
81
+ location_encoder = LabelEncoder().fit(df["location"])
82
+ return {
83
+ "price_model": price_model,
84
+ "location_model": loc_model,
85
+ "location_encoder": location_encoder,
86
+ "price_patterns": patterns,
87
+ "scaler": self.scaler,
88
+ "embedder": self.embedder,
89
+ }
90
+
91
+ # ------------ save ----------------------------------------------
92
+ def _save(self, bundle):
93
+ joblib.dump(bundle, MODEL_PATH)
94
+ log.info("Saved model bundle → %s", MODEL_PATH)
95
+
96
+ # ------------ report --------------------------------------------
97
+ def _report(self, df: pd.DataFrame):
98
+ print("\n📊 TRAINING REPORT")
99
+ print(f"Rows trained : {len(df):,}")
100
+ print(f"Locations : {df['location'].nunique()}")
101
+ print(f"Avg price : ${df['price'].mean():,.0f}")
102
+ print(f"Price range : ${df['price'].min():,.0f} – ${df['price'].max():,.0f}")
103
+ print(f"Models saved : {MODEL_PATH}\n")
104
+
105
+ # ------------ pipeline ------------------------------------------
106
+ def train(self):
107
+ print("\n" + "=" * 70)
108
+ print("🚀 ENHANCED MODEL TRAINING (REAL DATA)")
109
+ print("=" * 70 + "\n")
110
+
111
+ df = self._load()
112
+ if df is None:
113
+ return
114
+ X, y_price, y_loc = self._prep(df)
115
+ bundle = self._train(X, y_price, y_loc, df)
116
+ self._save(bundle)
117
+ self._report(df)
118
+
119
+ print("✅ Training complete – run test_ml_model.py to verify\n")
120
+
121
+
122
+ if __name__ == "__main__":
123
+ EnhancedModelTrainer().train()
models/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json ADDED
File without changes
models/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja ADDED
File without changes
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
3
+ size 90868376
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/58d4a9a45664eb9e12de9549c548c09b6134c17f ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dfc82496ec33f906b5b0d6750c1e2397da6530c74d1ae3568c55bc2739125e7
3
+ size 10454
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/59d594003bf59880a884c574bf88ef7555bb0202 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1993fde0a95c24ec6c022539d41cf6e2f7c9721e5415d6fb6897472a9cd4b7
3
+ size 53
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953f9c0d463486b10a6871cc2fd59f223b2c70184f49815e7efbcab5d8908b41
3
+ size 612
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84e40c8e006c9b1d6c122e02cba9b02458120b5fb0c87b746c41e0207cf642cf
3
+ size 349
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/c79f2b6a0cea6f4b564fed1938984bace9d30ff0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acb92769e8195aabd29b7b2137a9e6d6e25c476a4f15aa4355c233426c61576b
3
+ size 350
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/cb202bfe2e3c98645018a6d12f182a434c9d3e02 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be50c3628f2bf5bb5e3a7f17b1f74611b2561a3a27eeab05e5aa30f411572037
3
+ size 466247
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/d1514c3162bbe87b343f565fadc62e6c06f04f03 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be450dde3b0273bb9787637cfbd28fe04a7ba6ab9d36ac48e92b11e350ffc23
3
+ size 190
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303df45a03609e4ead04bc3dc1536d0ab19b5358db685b6f3da123d05ec200e3
3
+ size 112
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fb140275c155a9c7c5a3b3e0e77a9e839594a938 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3
3
+ size 231508
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fd1b291129c607e5d49799f87cb219b27f98acdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:061ca9d39661d6c6d6de5ba27f79a1cd5770ea247f8d46412a68a498dc5ac9f3
3
+ size 116
models/models--sentence-transformers--all-MiniLM-L6-v2/refs/main ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb692c300dcf5dd2a56116221d7613e9d360b6f0be22bc3e02e040e095b135b
3
+ size 40
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../../blobs/d1514c3162bbe87b343f565fadc62e6c06f04f03
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/58d4a9a45664eb9e12de9549c548c09b6134c17f
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/fd1b291129c607e5d49799f87cb219b27f98acdf
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/model.safetensors ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/sentence_bert_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/59d594003bf59880a884c574bf88ef7555bb0202
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/cb202bfe2e3c98645018a6d12f182a434c9d3e02
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/c79f2b6a0cea6f4b564fed1938984bace9d30ff0
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/vocab.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/fb140275c155a9c7c5a3b3e0e77a9e839594a938
scripts/download_training_data.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import sys, os
3
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
4
+ from app.ml.trainning.hf_dataset_downloader import HuggingFaceDatasetDownloader
5
+ HuggingFaceDatasetDownloader().download_and_prepare()
scripts/train_models.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import sys, os
3
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
4
+ from app.ml.trainning.train_enhanced_model import EnhancedModelTrainer
5
+ EnhancedModelTrainer().train()