Spaces:
Sleeping
Sleeping
Commit
·
ff21ae5
1
Parent(s):
9f3c354
tozo
Browse files- app/__pycache__/__init__.cpython-313.pyc +0 -0
- app/ai/nodes/intent_node.py +120 -56
- app/ai/prompts/system_prompt.txt +88 -137
- app/ai/routes/chat.py +64 -24
- app/ai/services/dynamic_role_manager.py +299 -0
- app/ml/__pycache__/__init__.cpython-313.pyc +0 -0
- app/ml/models/combined_training_data.csv +4 -0
- app/ml/models/combined_training_data.jsonl +3 -0
- app/ml/models/combined_training_data.parquet +3 -0
- app/ml/models/dataset_info.json +31 -0
- app/ml/models/field_models.pkl +3 -0
- app/ml/models/ml_listing_extractor.py +60 -59
- app/ml/models/user_role_context_handler.py +607 -0
- app/ml/trainning/__init__.py +1 -0
- app/ml/trainning/__pycache__/__init__.cpython-313.pyc +0 -0
- app/ml/trainning/__pycache__/hf_dataset_downloader.cpython-313.pyc +0 -0
- app/ml/trainning/__pycache__/train_enhanced_model.cpython-313.pyc +0 -0
- app/ml/trainning/hf_dataset_downloader.py +179 -0
- app/ml/trainning/train_enhanced_model.py +123 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json +0 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja +0 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/58d4a9a45664eb9e12de9549c548c09b6134c17f +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/59d594003bf59880a884c574bf88ef7555bb0202 +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00 +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43 +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/c79f2b6a0cea6f4b564fed1938984bace9d30ff0 +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/cb202bfe2e3c98645018a6d12f182a434c9d3e02 +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/d1514c3162bbe87b343f565fadc62e6c06f04f03 +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5 +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fb140275c155a9c7c5a3b3e0e77a9e839594a938 +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fd1b291129c607e5d49799f87cb219b27f98acdf +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/refs/main +3 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/README.md +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config.json +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/model.safetensors +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/sentence_bert_config.json +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/special_tokens_map.json +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer.json +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json +1 -0
- models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/vocab.txt +1 -0
- scripts/download_training_data.py +5 -0
- scripts/train_models.py +5 -0
app/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (278 Bytes). View file
|
|
|
app/ai/nodes/intent_node.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app/ai/nodes/intent_node.py
|
| 2 |
import json
|
| 3 |
import re
|
| 4 |
from typing import Dict, List
|
|
@@ -15,6 +15,7 @@ from app.core.error_handling import (
|
|
| 15 |
)
|
| 16 |
from app.core.observability import get_token_tracker
|
| 17 |
from app.ml.models.ml_listing_extractor import get_ml_extractor
|
|
|
|
| 18 |
from app.ai.nodes.draft_node import (
|
| 19 |
_generate_title,
|
| 20 |
_generate_description,
|
|
@@ -27,6 +28,22 @@ MAX_TOKENS = 600
|
|
| 27 |
TEMP = 0
|
| 28 |
|
| 29 |
ml_extractor = get_ml_extractor()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
# ============================================================
|
| 32 |
# Helpers
|
|
@@ -38,7 +55,7 @@ def _load_system() -> str:
|
|
| 38 |
with open("app/ai/prompts/system_prompt.txt", encoding="utf-8") as f:
|
| 39 |
return f.read()
|
| 40 |
except FileNotFoundError:
|
| 41 |
-
logger.error("
|
| 42 |
return "You are Aida, a helpful AI assistant."
|
| 43 |
|
| 44 |
SYSTEM_PROMPT = _load_system()
|
|
@@ -160,90 +177,105 @@ def _build_draft_preview(data: dict) -> dict:
|
|
| 160 |
"field_confidences": data.get("field_validations", {}),
|
| 161 |
}
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
# ============================================================
|
| 164 |
-
# Intent Node
|
| 165 |
# ============================================================
|
| 166 |
|
| 167 |
@async_retry(strategy=RetryStrategy.MODERATE, operation_name="intent_node")
|
| 168 |
async def intent_node(state: Dict) -> Dict:
|
| 169 |
-
"""
|
| 170 |
-
LangGraph node: Extract and route user intent
|
| 171 |
-
|
| 172 |
-
Features:
|
| 173 |
-
- Command detection when preview active
|
| 174 |
-
- Smart LLM routing with auto-fallback
|
| 175 |
-
- Context window management
|
| 176 |
-
- ML validation and inference
|
| 177 |
-
- Full error handling and observability
|
| 178 |
-
"""
|
| 179 |
|
| 180 |
current_msg = _get_current_message(state).lower()
|
| 181 |
status = state.get("status")
|
|
|
|
| 182 |
|
| 183 |
with trace_operation(
|
| 184 |
"intent_node",
|
| 185 |
{
|
| 186 |
"status": status,
|
|
|
|
| 187 |
"has_draft": state.get("draft_preview") is not None,
|
| 188 |
}
|
| 189 |
):
|
| 190 |
-
#
|
| 191 |
if status in {"preview_shown", "waiting_for_images"} and state.get("draft_preview"):
|
| 192 |
-
logger.info(f"
|
| 193 |
|
| 194 |
-
# PUBLISH command
|
| 195 |
if any(w in current_msg for w in {"publish", "go live", "confirm", "yes", "ok"}):
|
| 196 |
-
logger.info("
|
| 197 |
state["intent"] = "publish"
|
| 198 |
state["ai_reply"] = ""
|
| 199 |
return state
|
| 200 |
|
| 201 |
-
# EDIT command
|
| 202 |
if "edit" in current_msg or "change" in current_msg or "update" in current_msg:
|
| 203 |
field = current_msg.replace("edit", "").replace("change", "").replace("update", "").strip()
|
| 204 |
-
logger.info(f"
|
| 205 |
state["status"] = "collecting"
|
| 206 |
state["missing_fields"] = [field] if field else ["location"]
|
| 207 |
state["ai_reply"] = f"Sure! What would you like to change for **{field}**?"
|
| 208 |
return state
|
| 209 |
|
| 210 |
-
# DISCARD command
|
| 211 |
if any(w in current_msg for w in {"discard", "cancel", "delete", "no"}):
|
| 212 |
-
logger.info("
|
| 213 |
state["status"] = None
|
| 214 |
state["draft_preview"] = None
|
| 215 |
state["intent"] = None
|
| 216 |
state["ai_reply"] = "Draft discarded. Let me know if you want to list another property!"
|
| 217 |
return state
|
| 218 |
|
| 219 |
-
|
| 220 |
-
logger.info("💬 COMMAND: casual chat")
|
| 221 |
state["ai_reply"] = "Say **publish** to list, **edit** to change, or **discard** to start over."
|
| 222 |
return state
|
| 223 |
|
| 224 |
-
#
|
| 225 |
-
|
| 226 |
if state.get("status") in ["published", "error"]:
|
| 227 |
-
logger.info(f"
|
| 228 |
return state
|
| 229 |
|
| 230 |
-
user_role = state["user_role"]
|
| 231 |
user_id = state.get("user_id")
|
| 232 |
human_msg = state["messages"][-1]["content"]
|
| 233 |
|
| 234 |
-
#
|
| 235 |
with trace_operation("llm_call_with_routing"):
|
| 236 |
try:
|
| 237 |
-
# Manage context
|
| 238 |
context_mgr = get_context_manager()
|
| 239 |
messages = await context_mgr.manage_context([
|
| 240 |
-
{"role": "system", "content": SYSTEM_PROMPT.replace("{user_role}",
|
| 241 |
{"role": "user", "content": human_msg},
|
| 242 |
])
|
| 243 |
|
| 244 |
-
logger.info(
|
| 245 |
|
| 246 |
-
# Call LLM with smart routing
|
| 247 |
text, model_used, usage = await call_llm_smart(
|
| 248 |
messages,
|
| 249 |
intent=state.get("intent"),
|
|
@@ -251,7 +283,6 @@ async def intent_node(state: Dict) -> Dict:
|
|
| 251 |
max_tokens=MAX_TOKENS,
|
| 252 |
)
|
| 253 |
|
| 254 |
-
# Track tokens
|
| 255 |
tracker = get_token_tracker()
|
| 256 |
tracker.record_tokens(
|
| 257 |
model_used,
|
|
@@ -260,7 +291,7 @@ async def intent_node(state: Dict) -> Dict:
|
|
| 260 |
)
|
| 261 |
|
| 262 |
logger.info(
|
| 263 |
-
f"
|
| 264 |
extra={
|
| 265 |
"tokens": usage.get("total_tokens", 0),
|
| 266 |
"duration_ms": usage.get("duration_ms", 0),
|
|
@@ -270,26 +301,50 @@ async def intent_node(state: Dict) -> Dict:
|
|
| 270 |
raw = text
|
| 271 |
|
| 272 |
except LLMError as e:
|
| 273 |
-
logger.error(f"
|
| 274 |
state["ai_reply"] = "Sorry, I'm having trouble. Please try again."
|
| 275 |
state["status"] = "error"
|
| 276 |
return state
|
| 277 |
|
| 278 |
except Exception as e:
|
| 279 |
-
logger.error(f"
|
| 280 |
raise
|
| 281 |
|
| 282 |
-
#
|
| 283 |
try:
|
| 284 |
cleaned = _clean_json(raw)
|
| 285 |
data = json.loads(cleaned)
|
| 286 |
except json.JSONDecodeError as e:
|
| 287 |
-
logger.error(f"
|
| 288 |
data = {"allowed": False, "ai_reply": "Sorry, I didn't understand that. Could you rephrase?"}
|
| 289 |
|
| 290 |
-
#
|
| 291 |
if data.get("intent") == "list":
|
| 292 |
data["allowed"] = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
missing = _get_missing_fields(data)
|
| 294 |
|
| 295 |
if missing:
|
|
@@ -301,56 +356,63 @@ async def intent_node(state: Dict) -> Dict:
|
|
| 301 |
data["missing_fields"] = []
|
| 302 |
data["draft_preview"] = _build_draft_preview(data)
|
| 303 |
data["ai_reply"] = "Perfect! Let me prepare your listing draft..."
|
| 304 |
-
logger.info("
|
| 305 |
|
| 306 |
-
#
|
| 307 |
if data.get("intent") == "search":
|
| 308 |
data["allowed"] = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
-
#
|
| 311 |
location = _normalize_locations(data.get("location"))
|
| 312 |
amenities = _normalize_amenities(data.get("amenities", []))
|
| 313 |
price_type = _normalize_price_type(data.get("price_type"))
|
| 314 |
listing_type = _normalize_listing_type(data.get("listing_type"))
|
| 315 |
|
| 316 |
-
#
|
| 317 |
if data.get("intent") == "list":
|
| 318 |
with trace_operation("ml_processing"):
|
| 319 |
-
# Extract location
|
| 320 |
if data.get("location"):
|
| 321 |
try:
|
| 322 |
city, loc_info = await ml_extractor.extract_location_from_address(data["location"])
|
| 323 |
if city:
|
| 324 |
data["location"] = city
|
| 325 |
data["location_details"] = loc_info
|
| 326 |
-
logger.info(f"
|
| 327 |
except Exception as e:
|
| 328 |
-
logger.warning(f"
|
| 329 |
|
| 330 |
-
# Infer listing type
|
| 331 |
try:
|
| 332 |
-
lt, conf =
|
| 333 |
-
data, user_role=
|
| 334 |
)
|
| 335 |
if lt:
|
| 336 |
data["listing_type"] = lt
|
| 337 |
data["listing_confidence"] = conf
|
| 338 |
-
logger.info(f"
|
| 339 |
except Exception as e:
|
| 340 |
-
logger.warning(f"
|
| 341 |
|
| 342 |
-
# Infer currency
|
| 343 |
try:
|
| 344 |
currency, city, conf = await ml_extractor.infer_currency(data)
|
| 345 |
if currency:
|
| 346 |
data["currency"] = currency
|
| 347 |
data["currency_confidence"] = conf
|
| 348 |
-
logger.info(f"
|
| 349 |
except Exception as e:
|
| 350 |
-
logger.warning(f"
|
| 351 |
data["currency"] = data.get("currency", "XOF")
|
| 352 |
|
| 353 |
-
#
|
| 354 |
state.update(
|
| 355 |
allowed=data.get("allowed", False),
|
| 356 |
status=data.get("status"),
|
|
@@ -372,9 +434,11 @@ async def intent_node(state: Dict) -> Dict:
|
|
| 372 |
)
|
| 373 |
|
| 374 |
logger.info(
|
| 375 |
-
f"
|
| 376 |
extra={
|
| 377 |
"intent": data.get("intent"),
|
|
|
|
|
|
|
| 378 |
"status": state.get("status"),
|
| 379 |
}
|
| 380 |
)
|
|
|
|
| 1 |
+
# app/ai/nodes/intent_node.py - WITH DYNAMIC ROLE SYSTEM
|
| 2 |
import json
|
| 3 |
import re
|
| 4 |
from typing import Dict, List
|
|
|
|
| 15 |
)
|
| 16 |
from app.core.observability import get_token_tracker
|
| 17 |
from app.ml.models.ml_listing_extractor import get_ml_extractor
|
| 18 |
+
from app.ai.services.dynamic_role_manager import get_dynamic_role_manager
|
| 19 |
from app.ai.nodes.draft_node import (
|
| 20 |
_generate_title,
|
| 21 |
_generate_description,
|
|
|
|
| 28 |
TEMP = 0
|
| 29 |
|
| 30 |
ml_extractor = get_ml_extractor()
|
| 31 |
+
role_manager = get_dynamic_role_manager()
|
| 32 |
+
|
| 33 |
+
# Import Role-Based Inference Engine
|
| 34 |
+
try:
|
| 35 |
+
from app.ml.models.user_role_context_handler import (
|
| 36 |
+
RoleBasedInferenceEngine,
|
| 37 |
+
UserRoleDetector,
|
| 38 |
+
)
|
| 39 |
+
role_engine = RoleBasedInferenceEngine()
|
| 40 |
+
role_detector = UserRoleDetector()
|
| 41 |
+
logger.info("Role-based inference engine loaded")
|
| 42 |
+
except Exception as e:
|
| 43 |
+
logger.warning(f"Role-based inference not available: {e}")
|
| 44 |
+
role_engine = None
|
| 45 |
+
role_detector = None
|
| 46 |
+
|
| 47 |
|
| 48 |
# ============================================================
|
| 49 |
# Helpers
|
|
|
|
| 55 |
with open("app/ai/prompts/system_prompt.txt", encoding="utf-8") as f:
|
| 56 |
return f.read()
|
| 57 |
except FileNotFoundError:
|
| 58 |
+
logger.error("System prompt file not found")
|
| 59 |
return "You are Aida, a helpful AI assistant."
|
| 60 |
|
| 61 |
SYSTEM_PROMPT = _load_system()
|
|
|
|
| 177 |
"field_confidences": data.get("field_validations", {}),
|
| 178 |
}
|
| 179 |
|
| 180 |
+
def infer_listing_type(state: Dict, user_role: str = None, user_message: str = None) -> tuple:
|
| 181 |
+
"""Intelligently infer listing_type"""
|
| 182 |
+
|
| 183 |
+
explicit_type = state.get("listing_type")
|
| 184 |
+
price_type = state.get("price_type")
|
| 185 |
+
|
| 186 |
+
# 1. If explicitly stated, use it
|
| 187 |
+
if explicit_type and explicit_type in ["rent", "short-stay", "sale", "roommate"]:
|
| 188 |
+
logger.info(f"Listing type explicit: {explicit_type}")
|
| 189 |
+
return explicit_type, 0.99
|
| 190 |
+
|
| 191 |
+
# 2. Infer from price_type
|
| 192 |
+
if price_type:
|
| 193 |
+
price_type_lower = price_type.lower().strip()
|
| 194 |
+
|
| 195 |
+
# Short-stay indicators
|
| 196 |
+
if price_type_lower in ["nightly", "daily", "weekly", "night", "day", "week"]:
|
| 197 |
+
logger.info(f"Inferred short-stay from price_type: {price_type}")
|
| 198 |
+
return "short-stay", 0.95
|
| 199 |
+
|
| 200 |
+
# Monthly/Yearly = RENT
|
| 201 |
+
elif price_type_lower in ["monthly", "yearly", "month", "year", "mth", "yr"]:
|
| 202 |
+
logger.info(f"Inferred rent from price_type: {price_type}")
|
| 203 |
+
return "rent", 0.95
|
| 204 |
+
|
| 205 |
+
# 3. Default to rent
|
| 206 |
+
logger.warning("Could not infer listing_type, defaulting to rent")
|
| 207 |
+
return "rent", 0.5
|
| 208 |
+
|
| 209 |
+
|
| 210 |
# ============================================================
|
| 211 |
+
# Intent Node with Dynamic Role
|
| 212 |
# ============================================================
|
| 213 |
|
| 214 |
@async_retry(strategy=RetryStrategy.MODERATE, operation_name="intent_node")
|
| 215 |
async def intent_node(state: Dict) -> Dict:
|
| 216 |
+
"""LangGraph node: Extract intent with DYNAMIC ROLE ASSIGNMENT"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
current_msg = _get_current_message(state).lower()
|
| 219 |
status = state.get("status")
|
| 220 |
+
base_user_role = state.get("user_role", "renter") # Original signup role
|
| 221 |
|
| 222 |
with trace_operation(
|
| 223 |
"intent_node",
|
| 224 |
{
|
| 225 |
"status": status,
|
| 226 |
+
"base_role": base_user_role,
|
| 227 |
"has_draft": state.get("draft_preview") is not None,
|
| 228 |
}
|
| 229 |
):
|
| 230 |
+
# Handle commands FIRST when preview is active
|
| 231 |
if status in {"preview_shown", "waiting_for_images"} and state.get("draft_preview"):
|
| 232 |
+
logger.info(f"COMMAND DETECTION MODE: status={status}")
|
| 233 |
|
|
|
|
| 234 |
if any(w in current_msg for w in {"publish", "go live", "confirm", "yes", "ok"}):
|
| 235 |
+
logger.info("COMMAND: publish")
|
| 236 |
state["intent"] = "publish"
|
| 237 |
state["ai_reply"] = ""
|
| 238 |
return state
|
| 239 |
|
|
|
|
| 240 |
if "edit" in current_msg or "change" in current_msg or "update" in current_msg:
|
| 241 |
field = current_msg.replace("edit", "").replace("change", "").replace("update", "").strip()
|
| 242 |
+
logger.info(f"COMMAND: edit field='{field}'")
|
| 243 |
state["status"] = "collecting"
|
| 244 |
state["missing_fields"] = [field] if field else ["location"]
|
| 245 |
state["ai_reply"] = f"Sure! What would you like to change for **{field}**?"
|
| 246 |
return state
|
| 247 |
|
|
|
|
| 248 |
if any(w in current_msg for w in {"discard", "cancel", "delete", "no"}):
|
| 249 |
+
logger.info("COMMAND: discard")
|
| 250 |
state["status"] = None
|
| 251 |
state["draft_preview"] = None
|
| 252 |
state["intent"] = None
|
| 253 |
state["ai_reply"] = "Draft discarded. Let me know if you want to list another property!"
|
| 254 |
return state
|
| 255 |
|
| 256 |
+
logger.info("COMMAND: casual chat")
|
|
|
|
| 257 |
state["ai_reply"] = "Say **publish** to list, **edit** to change, or **discard** to start over."
|
| 258 |
return state
|
| 259 |
|
| 260 |
+
# Skip if status is published or error
|
|
|
|
| 261 |
if state.get("status") in ["published", "error"]:
|
| 262 |
+
logger.info(f"Skipping intent_node, status={state.get('status')}")
|
| 263 |
return state
|
| 264 |
|
|
|
|
| 265 |
user_id = state.get("user_id")
|
| 266 |
human_msg = state["messages"][-1]["content"]
|
| 267 |
|
| 268 |
+
# LLM CALL
|
| 269 |
with trace_operation("llm_call_with_routing"):
|
| 270 |
try:
|
|
|
|
| 271 |
context_mgr = get_context_manager()
|
| 272 |
messages = await context_mgr.manage_context([
|
| 273 |
+
{"role": "system", "content": SYSTEM_PROMPT.replace("{user_role}", base_user_role)},
|
| 274 |
{"role": "user", "content": human_msg},
|
| 275 |
])
|
| 276 |
|
| 277 |
+
logger.info("Calling LLM with smart routing")
|
| 278 |
|
|
|
|
| 279 |
text, model_used, usage = await call_llm_smart(
|
| 280 |
messages,
|
| 281 |
intent=state.get("intent"),
|
|
|
|
| 283 |
max_tokens=MAX_TOKENS,
|
| 284 |
)
|
| 285 |
|
|
|
|
| 286 |
tracker = get_token_tracker()
|
| 287 |
tracker.record_tokens(
|
| 288 |
model_used,
|
|
|
|
| 291 |
)
|
| 292 |
|
| 293 |
logger.info(
|
| 294 |
+
f"LLM response from {model_used}",
|
| 295 |
extra={
|
| 296 |
"tokens": usage.get("total_tokens", 0),
|
| 297 |
"duration_ms": usage.get("duration_ms", 0),
|
|
|
|
| 301 |
raw = text
|
| 302 |
|
| 303 |
except LLMError as e:
|
| 304 |
+
logger.error(f"LLM error: {e.message}")
|
| 305 |
state["ai_reply"] = "Sorry, I'm having trouble. Please try again."
|
| 306 |
state["status"] = "error"
|
| 307 |
return state
|
| 308 |
|
| 309 |
except Exception as e:
|
| 310 |
+
logger.error(f"Unexpected LLM error: {e}", exc_info=True)
|
| 311 |
raise
|
| 312 |
|
| 313 |
+
# Parse JSON
|
| 314 |
try:
|
| 315 |
cleaned = _clean_json(raw)
|
| 316 |
data = json.loads(cleaned)
|
| 317 |
except json.JSONDecodeError as e:
|
| 318 |
+
logger.error(f"Invalid JSON response: {raw[:100]}")
|
| 319 |
data = {"allowed": False, "ai_reply": "Sorry, I didn't understand that. Could you rephrase?"}
|
| 320 |
|
| 321 |
+
# Handle LISTING INTENT with DYNAMIC ROLE
|
| 322 |
if data.get("intent") == "list":
|
| 323 |
data["allowed"] = True
|
| 324 |
+
listing_type = _normalize_listing_type(data.get("listing_type"))
|
| 325 |
+
|
| 326 |
+
# NEW: Get dynamic role based on listing type
|
| 327 |
+
dynamic_role, role_desc, role_conf = role_manager.get_dynamic_role_for_listing(
|
| 328 |
+
base_user_role,
|
| 329 |
+
listing_type
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
if dynamic_role is None:
|
| 333 |
+
# User not allowed to create this type of listing
|
| 334 |
+
logger.warning(f"User {base_user_role} cannot create {listing_type} listing")
|
| 335 |
+
data["allowed"] = False
|
| 336 |
+
data["ai_reply"] = role_desc
|
| 337 |
+
state.update(
|
| 338 |
+
allowed=False,
|
| 339 |
+
ai_reply=data["ai_reply"],
|
| 340 |
+
status="error"
|
| 341 |
+
)
|
| 342 |
+
return state
|
| 343 |
+
|
| 344 |
+
# Store dynamic role in state
|
| 345 |
+
state["dynamic_role"] = dynamic_role
|
| 346 |
+
logger.info(f"Dynamic role assigned: {dynamic_role} ({role_desc})")
|
| 347 |
+
|
| 348 |
missing = _get_missing_fields(data)
|
| 349 |
|
| 350 |
if missing:
|
|
|
|
| 356 |
data["missing_fields"] = []
|
| 357 |
data["draft_preview"] = _build_draft_preview(data)
|
| 358 |
data["ai_reply"] = "Perfect! Let me prepare your listing draft..."
|
| 359 |
+
logger.info("All required fields complete")
|
| 360 |
|
| 361 |
+
# Handle SEARCH INTENT with DYNAMIC ROLE
|
| 362 |
if data.get("intent") == "search":
|
| 363 |
data["allowed"] = True
|
| 364 |
+
|
| 365 |
+
# NEW: Get dynamic role for search
|
| 366 |
+
dynamic_role, role_desc, role_conf = role_manager.get_dynamic_role_for_search(
|
| 367 |
+
base_user_role,
|
| 368 |
+
data.get("listing_type")
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
# Store dynamic role
|
| 372 |
+
state["dynamic_role"] = dynamic_role
|
| 373 |
+
logger.info(f"Dynamic role assigned: {dynamic_role} ({role_desc})")
|
| 374 |
|
| 375 |
+
# Normalize values
|
| 376 |
location = _normalize_locations(data.get("location"))
|
| 377 |
amenities = _normalize_amenities(data.get("amenities", []))
|
| 378 |
price_type = _normalize_price_type(data.get("price_type"))
|
| 379 |
listing_type = _normalize_listing_type(data.get("listing_type"))
|
| 380 |
|
| 381 |
+
# ML INFERENCE
|
| 382 |
if data.get("intent") == "list":
|
| 383 |
with trace_operation("ml_processing"):
|
|
|
|
| 384 |
if data.get("location"):
|
| 385 |
try:
|
| 386 |
city, loc_info = await ml_extractor.extract_location_from_address(data["location"])
|
| 387 |
if city:
|
| 388 |
data["location"] = city
|
| 389 |
data["location_details"] = loc_info
|
| 390 |
+
logger.info(f"Location extracted: {data['location']}")
|
| 391 |
except Exception as e:
|
| 392 |
+
logger.warning(f"Location extraction failed: {e}")
|
| 393 |
|
|
|
|
| 394 |
try:
|
| 395 |
+
lt, conf = infer_listing_type(
|
| 396 |
+
data, user_role=state.get("dynamic_role"), user_message=human_msg
|
| 397 |
)
|
| 398 |
if lt:
|
| 399 |
data["listing_type"] = lt
|
| 400 |
data["listing_confidence"] = conf
|
| 401 |
+
logger.info(f"Listing type inferred: {lt}")
|
| 402 |
except Exception as e:
|
| 403 |
+
logger.warning(f"Listing type inference failed: {e}")
|
| 404 |
|
|
|
|
| 405 |
try:
|
| 406 |
currency, city, conf = await ml_extractor.infer_currency(data)
|
| 407 |
if currency:
|
| 408 |
data["currency"] = currency
|
| 409 |
data["currency_confidence"] = conf
|
| 410 |
+
logger.info(f"Currency inferred: {currency}")
|
| 411 |
except Exception as e:
|
| 412 |
+
logger.warning(f"Currency inference failed: {e}")
|
| 413 |
data["currency"] = data.get("currency", "XOF")
|
| 414 |
|
| 415 |
+
# Update state
|
| 416 |
state.update(
|
| 417 |
allowed=data.get("allowed", False),
|
| 418 |
status=data.get("status"),
|
|
|
|
| 434 |
)
|
| 435 |
|
| 436 |
logger.info(
|
| 437 |
+
f"Intent node processed",
|
| 438 |
extra={
|
| 439 |
"intent": data.get("intent"),
|
| 440 |
+
"base_role": base_user_role,
|
| 441 |
+
"dynamic_role": state.get("dynamic_role"),
|
| 442 |
"status": state.get("status"),
|
| 443 |
}
|
| 444 |
)
|
app/ai/prompts/system_prompt.txt
CHANGED
|
@@ -1,76 +1,89 @@
|
|
| 1 |
You are Aida, Lojiz's friendly real-estate AI assistant.
|
| 2 |
-
User role: {user_role} (landlord | renter)
|
| 3 |
|
| 4 |
------------------------------------------------------
|
| 5 |
-
BRAND IDENTITY
|
| 6 |
------------------------------------------------------
|
| 7 |
|
| 8 |
-
|
| 9 |
- You are AIDA, Lojiz's proprietary real-estate AI assistant
|
| 10 |
- Do NOT mention DeepSeek, GPT, or any other LLM names
|
| 11 |
- You are a standalone AI developed specifically for Lojiz
|
| 12 |
- Never refer to yourself as based on other AI models
|
| 13 |
|
| 14 |
-
|
| 15 |
-
"I'm Aida, Lojiz's AI real-estate assistant. I help landlords list properties
|
| 16 |
|
| 17 |
-
|
| 18 |
"I was designed and developed by the Lojiz team. Our talented team includes:
|
| 19 |
-
-
|
| 20 |
-
-
|
| 21 |
-
|
| 22 |
Together, they created me to make real-estate transactions easier and more accessible for everyone."
|
| 23 |
|
| 24 |
-
|
| 25 |
-
"I'm
|
| 26 |
-
|
| 27 |
-
**IF ASKED "ARE YOU AN LLM?" OR "WHAT MODEL ARE YOU?"**
|
| 28 |
-
"I'm Aida, Lojiz's specialized AI assistant. I'm not just a general-purpose AI - I'm specifically built to understand real-estate, help with property listings, and guide users through the Lojiz platform. That's what makes me different!"
|
| 29 |
|
| 30 |
-
|
| 31 |
-
-
|
| 32 |
-
-
|
| 33 |
-
-
|
| 34 |
-
- ❌ "I was trained on general data"
|
| 35 |
-
- ✅ Say "I'm Aida, Lojiz's AI assistant" instead
|
| 36 |
|
| 37 |
------------------------------------------------------
|
| 38 |
-
PERSONALITY
|
| 39 |
------------------------------------------------------
|
| 40 |
-
- Speak like a human, short and warm
|
| 41 |
-
- IGNORE typos
|
| 42 |
-
- If
|
| 43 |
-
- Only switch to JSON
|
| 44 |
-
- Be proud of being Aida - make it clear you're
|
| 45 |
|
| 46 |
------------------------------------------------------
|
| 47 |
-
|
| 48 |
------------------------------------------------------
|
| 49 |
-
- ⚠️ IMPORTANT: If a user corrects ANY field (location, bedrooms, price, etc.), PRESERVE that correction
|
| 50 |
-
- Do NOT regenerate or revert corrections when showing previews
|
| 51 |
-
- Example: If user says "actually it's in Calavi not Cotonou", update state AND keep it that way
|
| 52 |
-
- When showing draft preview, always use the LATEST corrected values from state
|
| 53 |
-
- If a user requests changes (amenity icons, requirements, etc.), apply them WITHOUT reverting previous corrections
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
|
| 63 |
-
-
|
| 64 |
-
-
|
| 65 |
-
-
|
| 66 |
-
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
-
|
| 70 |
-
-
|
| 71 |
-
-
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
------------------------------------------------------
|
| 76 |
LISTING CREATION - PROGRESSIVE COLLECTION
|
|
@@ -91,27 +104,26 @@ OPTIONAL FIELDS (Ask, but not required):
|
|
| 91 |
AUTO-GENERATED:
|
| 92 |
- title (AI generates from location, bedrooms, listing_type)
|
| 93 |
- description (AI generates professional description)
|
| 94 |
-
- currency (auto-detect from location: Lagos
|
| 95 |
-
- amenities_with_icons (backend adds icons to all amenities)
|
| 96 |
|
| 97 |
LOCATION EXTRACTION:
|
| 98 |
- Extract ONLY the city/area name
|
| 99 |
- Ignore long descriptions
|
| 100 |
- Examples:
|
| 101 |
-
"calavi quartier zogbadje"
|
| 102 |
-
"VI in Lagos"
|
| 103 |
-
"Lekki, Lagos"
|
| 104 |
|
| 105 |
LISTING TYPE AUTO-DETECTION:
|
| 106 |
-
- "for rent" / "monthly" / "yearly"
|
| 107 |
-
- "short stay" / "nightly" / "daily" / "weekly"
|
| 108 |
-
- "for sale" / "selling"
|
| 109 |
-
- "roommate" / "sharing" / "flatmate"
|
| 110 |
|
| 111 |
PRICE TYPE AUTO-DETECTION:
|
| 112 |
-
- "monthly" / "month" / "per month" / "mth"
|
| 113 |
-
- "nightly" / "night" / "per night" / "daily" / "day"
|
| 114 |
-
- "yearly" / "year" / "per year" / "annum"
|
| 115 |
|
| 116 |
PROGRESSIVE COLLECTION FLOW:
|
| 117 |
1. User provides initial info (may be incomplete)
|
|
@@ -120,10 +132,10 @@ PROGRESSIVE COLLECTION FLOW:
|
|
| 120 |
4. Ask for missing fields ONE AT A TIME
|
| 121 |
5. User provides each field (or corrects previous ones)
|
| 122 |
6. Append/update to existing fields
|
| 123 |
-
7. When ALL required fields complete
|
| 124 |
-
8. When amenities/requirements collected
|
| 125 |
9. Show DRAFT preview to user
|
| 126 |
-
10. User can request changes
|
| 127 |
11. User reviews and says "publish" to confirm
|
| 128 |
|
| 129 |
------------------------------------------------------
|
|
@@ -135,7 +147,7 @@ When user starts listing a property:
|
|
| 135 |
2. Check for missing REQUIRED fields
|
| 136 |
3. Ask missing fields one by one
|
| 137 |
4. Build up state progressively
|
| 138 |
-
5.
|
| 139 |
|
| 140 |
Response format while collecting:
|
| 141 |
{
|
|
@@ -150,7 +162,7 @@ Response format while collecting:
|
|
| 150 |
"requirements": null,
|
| 151 |
"status": "collecting",
|
| 152 |
"missing_fields": ["amenities", "requirements"],
|
| 153 |
-
"next_question": "Any amenities? (e.g., wifi, parking, balcony, pool, furnished, kitchen
|
| 154 |
"ai_reply": "Great! I have: 2-bed in Lagos, 50k/month. Any amenities?"
|
| 155 |
}
|
| 156 |
|
|
@@ -171,71 +183,8 @@ When ALL required fields complete:
|
|
| 171 |
"draft_preview": null
|
| 172 |
}
|
| 173 |
|
| 174 |
-
When amenities/requirements provided:
|
| 175 |
-
{
|
| 176 |
-
"intent": "list",
|
| 177 |
-
"status": "draft_ready",
|
| 178 |
-
"ai_reply": "Perfect! Let me prepare your listing draft...",
|
| 179 |
-
"draft_preview": {
|
| 180 |
-
...listing data...
|
| 181 |
-
"amenities_with_icons": "📶 Wifi | 🅿️ Parking | 🧼 Washing Machine | 🔥 Dryer"
|
| 182 |
-
}
|
| 183 |
-
}
|
| 184 |
-
|
| 185 |
-
------------------------------------------------------
|
| 186 |
-
EXAMPLES - LISTING CREATION
|
| 187 |
-
------------------------------------------------------
|
| 188 |
-
|
| 189 |
-
User: "I want to list my 2-bed apartment in Lagos for rent, 50k monthly"
|
| 190 |
-
{
|
| 191 |
-
"intent": "list",
|
| 192 |
-
"location": "lagos",
|
| 193 |
-
"bedrooms": 2,
|
| 194 |
-
"bathrooms": null,
|
| 195 |
-
"price": 50000,
|
| 196 |
-
"listing_type": "rent",
|
| 197 |
-
"price_type": "monthly",
|
| 198 |
-
"amenities": [],
|
| 199 |
-
"requirements": null,
|
| 200 |
-
"status": "collecting",
|
| 201 |
-
"missing_fields": ["bathrooms"],
|
| 202 |
-
"next_question": "How many bathrooms?",
|
| 203 |
-
"ai_reply": "Got it! 2-bed in Lagos, 50k/month. How many bathrooms?"
|
| 204 |
-
}
|
| 205 |
-
|
| 206 |
-
User: "1 bathroom, with wifi and parking"
|
| 207 |
-
{
|
| 208 |
-
"intent": "list",
|
| 209 |
-
"location": "lagos",
|
| 210 |
-
"bedrooms": 2,
|
| 211 |
-
"bathrooms": 1,
|
| 212 |
-
"price": 50000,
|
| 213 |
-
"listing_type": "rent",
|
| 214 |
-
"price_type": "monthly",
|
| 215 |
-
"amenities": ["wifi", "parking"],
|
| 216 |
-
"requirements": null,
|
| 217 |
-
"status": "checking_optional",
|
| 218 |
-
"missing_fields": [],
|
| 219 |
-
"ai_reply": "Perfect! Any special requirements for renters?",
|
| 220 |
-
}
|
| 221 |
-
|
| 222 |
-
User: "actually it's in Calavi not Lagos"
|
| 223 |
-
{
|
| 224 |
-
"intent": "list",
|
| 225 |
-
"location": "calavi",
|
| 226 |
-
"bedrooms": 2,
|
| 227 |
-
"bathrooms": 1,
|
| 228 |
-
"price": 50000,
|
| 229 |
-
"listing_type": "rent",
|
| 230 |
-
"price_type": "monthly",
|
| 231 |
-
"amenities": ["wifi", "parking"],
|
| 232 |
-
"requirements": null,
|
| 233 |
-
"status": "checking_optional",
|
| 234 |
-
"ai_reply": "Got it! Updated to Calavi. Any special requirements for renters?",
|
| 235 |
-
}
|
| 236 |
-
|
| 237 |
------------------------------------------------------
|
| 238 |
-
SEARCH INTENT
|
| 239 |
------------------------------------------------------
|
| 240 |
When user searches, extract and COMBINE ALL mentioned filters:
|
| 241 |
- location: city/area (handle typos)
|
|
@@ -270,11 +219,13 @@ CRITICAL RULES
|
|
| 270 |
4. For listing creation: collect required fields progressively
|
| 271 |
5. For search: extract all filters at once
|
| 272 |
6. Return ONLY JSON (no markdown, no preamble)
|
| 273 |
-
7. When all required fields complete
|
| 274 |
-
8. When optional fields provided
|
| 275 |
9. Wait for "publish" command to save listing
|
| 276 |
-
10.
|
| 277 |
-
11.
|
| 278 |
-
12. BRAND RULE:
|
| 279 |
-
13. BRAND RULE:
|
| 280 |
-
14.
|
|
|
|
|
|
|
|
|
| 1 |
You are Aida, Lojiz's friendly real-estate AI assistant.
|
| 2 |
+
User role: {user_role} (landlord | renter | airbnb_host | roommate_seeker | seller | buyer)
|
| 3 |
|
| 4 |
------------------------------------------------------
|
| 5 |
+
BRAND IDENTITY
|
| 6 |
------------------------------------------------------
|
| 7 |
|
| 8 |
+
WHO YOU ARE:
|
| 9 |
- You are AIDA, Lojiz's proprietary real-estate AI assistant
|
| 10 |
- Do NOT mention DeepSeek, GPT, or any other LLM names
|
| 11 |
- You are a standalone AI developed specifically for Lojiz
|
| 12 |
- Never refer to yourself as based on other AI models
|
| 13 |
|
| 14 |
+
IF ASKED "WHO ARE YOU?":
|
| 15 |
+
"I'm Aida, Lojiz's AI real-estate assistant. I help landlords list properties, renters find homes, and more!"
|
| 16 |
|
| 17 |
+
IF ASKED "WHO DESIGNED YOU?":
|
| 18 |
"I was designed and developed by the Lojiz team. Our talented team includes:
|
| 19 |
+
- Emeka Destiny Ebuka - Lead AI & ML Engineer/Developer
|
| 20 |
+
- Uche Samuel Chukwuemeka - Product Designer
|
|
|
|
| 21 |
Together, they created me to make real-estate transactions easier and more accessible for everyone."
|
| 22 |
|
| 23 |
+
IF ASKED "ARE YOU AN LLM?":
|
| 24 |
+
"I'm Aida, Lojiz's specialized AI assistant. I'm not just a general-purpose AI - I'm specifically built to understand real-estate, help with property listings, and guide users through the Lojiz platform."
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
NEVER SAY:
|
| 27 |
+
- "I'm based on DeepSeek"
|
| 28 |
+
- "I'm powered by GPT"
|
| 29 |
+
- "I'm an LLM (Language Model)"
|
|
|
|
|
|
|
| 30 |
|
| 31 |
------------------------------------------------------
|
| 32 |
+
PERSONALITY
|
| 33 |
------------------------------------------------------
|
| 34 |
+
- Speak like a human, short and warm
|
| 35 |
+
- IGNORE typos and grammar mistakes. Understand intent anyway
|
| 36 |
+
- If user talks off-topic, reply casually and ask: "Would you like to list a property or search for one?"
|
| 37 |
+
- Only switch to JSON when property details are mentioned
|
| 38 |
+
- Be proud of being Aida - make it clear you're specialized in real-estate
|
| 39 |
|
| 40 |
------------------------------------------------------
|
| 41 |
+
ROLE-BASED BEHAVIOR
|
| 42 |
------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
Detect and adapt to different user roles:
|
| 45 |
+
|
| 46 |
+
LANDLORD:
|
| 47 |
+
- Help list properties for rent or sale
|
| 48 |
+
- Focus on: location, price, amenities, tenant requirements
|
| 49 |
+
- Price types: monthly, yearly
|
| 50 |
+
- Listing types: rent, short-stay, sale
|
| 51 |
+
|
| 52 |
+
RENTER/TENANT:
|
| 53 |
+
- Help search for rental properties
|
| 54 |
+
- Focus on: budget, location, amenities needed
|
| 55 |
+
- Can also list rooms for roommate matching
|
| 56 |
+
- Listing types: roommate
|
| 57 |
|
| 58 |
+
AIRBNB HOST:
|
| 59 |
+
- Help list short-stay vacation properties
|
| 60 |
+
- Focus on: guest amenities, location convenience
|
| 61 |
+
- Price types: nightly, daily, weekly
|
| 62 |
+
- Listing type: short-stay
|
| 63 |
|
| 64 |
+
ROOMMATE SEEKER:
|
| 65 |
+
- Help find rooms to share or advertise spare rooms
|
| 66 |
+
- Focus on: compatibility, house rules, utilities included
|
| 67 |
+
- Price types: monthly, yearly
|
| 68 |
+
- Listing type: roommate
|
| 69 |
+
|
| 70 |
+
SELLER (Property Sale):
|
| 71 |
+
- Help list properties for sale
|
| 72 |
+
- Focus on: property condition, location, price
|
| 73 |
+
- Listing type: sale
|
| 74 |
+
|
| 75 |
+
BUYER:
|
| 76 |
+
- Help search for properties to purchase
|
| 77 |
+
- Focus on: budget range, location, property type
|
| 78 |
+
|
| 79 |
+
------------------------------------------------------
|
| 80 |
+
PRESERVE USER CORRECTIONS
|
| 81 |
+
------------------------------------------------------
|
| 82 |
+
IMPORTANT: If a user corrects ANY field (location, bedrooms, price, etc.), PRESERVE that correction
|
| 83 |
+
- Do NOT regenerate or revert corrections when showing previews
|
| 84 |
+
- Example: If user says "actually it's in Calavi not Cotonou", update state AND keep it that way
|
| 85 |
+
- When showing draft preview, always use the LATEST corrected values
|
| 86 |
+
- If a user requests changes, apply them WITHOUT reverting previous corrections
|
| 87 |
|
| 88 |
------------------------------------------------------
|
| 89 |
LISTING CREATION - PROGRESSIVE COLLECTION
|
|
|
|
| 104 |
AUTO-GENERATED:
|
| 105 |
- title (AI generates from location, bedrooms, listing_type)
|
| 106 |
- description (AI generates professional description)
|
| 107 |
+
- currency (auto-detect from location: Lagos=NGN, Cotonou=XOF, etc.)
|
|
|
|
| 108 |
|
| 109 |
LOCATION EXTRACTION:
|
| 110 |
- Extract ONLY the city/area name
|
| 111 |
- Ignore long descriptions
|
| 112 |
- Examples:
|
| 113 |
+
"calavi quartier zogbadje" -> location: "calavi"
|
| 114 |
+
"VI in Lagos" -> location: "lagos"
|
| 115 |
+
"Lekki, Lagos" -> location: "lagos"
|
| 116 |
|
| 117 |
LISTING TYPE AUTO-DETECTION:
|
| 118 |
+
- "for rent" / "monthly" / "yearly" -> rent
|
| 119 |
+
- "short stay" / "nightly" / "daily" / "weekly" -> short-stay
|
| 120 |
+
- "for sale" / "selling" -> sale
|
| 121 |
+
- "roommate" / "sharing" / "flatmate" -> roommate
|
| 122 |
|
| 123 |
PRICE TYPE AUTO-DETECTION:
|
| 124 |
+
- "monthly" / "month" / "per month" / "mth" -> monthly
|
| 125 |
+
- "nightly" / "night" / "per night" / "daily" / "day" -> nightly
|
| 126 |
+
- "yearly" / "year" / "per year" / "annum" -> yearly
|
| 127 |
|
| 128 |
PROGRESSIVE COLLECTION FLOW:
|
| 129 |
1. User provides initial info (may be incomplete)
|
|
|
|
| 132 |
4. Ask for missing fields ONE AT A TIME
|
| 133 |
5. User provides each field (or corrects previous ones)
|
| 134 |
6. Append/update to existing fields
|
| 135 |
+
7. When ALL required fields complete -> Ask for optional fields
|
| 136 |
+
8. When amenities/requirements collected -> Generate DRAFT
|
| 137 |
9. Show DRAFT preview to user
|
| 138 |
+
10. User can request changes - PRESERVE all corrections
|
| 139 |
11. User reviews and says "publish" to confirm
|
| 140 |
|
| 141 |
------------------------------------------------------
|
|
|
|
| 147 |
2. Check for missing REQUIRED fields
|
| 148 |
3. Ask missing fields one by one
|
| 149 |
4. Build up state progressively
|
| 150 |
+
5. PRESERVE all corrections and changes
|
| 151 |
|
| 152 |
Response format while collecting:
|
| 153 |
{
|
|
|
|
| 162 |
"requirements": null,
|
| 163 |
"status": "collecting",
|
| 164 |
"missing_fields": ["amenities", "requirements"],
|
| 165 |
+
"next_question": "Any amenities? (e.g., wifi, parking, balcony, pool, furnished, kitchen)",
|
| 166 |
"ai_reply": "Great! I have: 2-bed in Lagos, 50k/month. Any amenities?"
|
| 167 |
}
|
| 168 |
|
|
|
|
| 183 |
"draft_preview": null
|
| 184 |
}
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
------------------------------------------------------
|
| 187 |
+
SEARCH INTENT
|
| 188 |
------------------------------------------------------
|
| 189 |
When user searches, extract and COMBINE ALL mentioned filters:
|
| 190 |
- location: city/area (handle typos)
|
|
|
|
| 219 |
4. For listing creation: collect required fields progressively
|
| 220 |
5. For search: extract all filters at once
|
| 221 |
6. Return ONLY JSON (no markdown, no preamble)
|
| 222 |
+
7. When all required fields complete -> ask for optional fields ONLY ONCE
|
| 223 |
+
8. When optional fields provided -> show draft preview
|
| 224 |
9. Wait for "publish" command to save listing
|
| 225 |
+
10. PRESERVE user corrections - never revert them
|
| 226 |
+
11. BRAND RULE: Always refer to yourself as AIDA, never mention other LLMs
|
| 227 |
+
12. BRAND RULE: Give credit to Lojiz team when asked about your creators
|
| 228 |
+
13. BRAND RULE: Stand your ground as a specialized real-estate AI
|
| 229 |
+
14. ROLE RULE: Detect and adapt to user role
|
| 230 |
+
15. ROLE RULE: Adjust required fields based on user role
|
| 231 |
+
16. ROLE RULE: Use role-specific language and focus areas
|
app/ai/routes/chat.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app/ai/routes/chat.py - Enhanced with
|
| 2 |
from fastapi import APIRouter, Depends, HTTPException, Request
|
| 3 |
from fastapi.security import HTTPBearer
|
| 4 |
from pydantic import BaseModel
|
|
@@ -34,6 +34,25 @@ class AskBody(BaseModel):
|
|
| 34 |
user_role: Optional[str] = None
|
| 35 |
history: Optional[List[MessageHistory]] = None
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
# ============================================================
|
| 38 |
# Enhanced Chat Endpoint
|
| 39 |
# ============================================================
|
|
@@ -45,14 +64,7 @@ async def ask_ai(
|
|
| 45 |
request: Request,
|
| 46 |
token: str = Depends(security),
|
| 47 |
):
|
| 48 |
-
"""
|
| 49 |
-
Enhanced chat endpoint with:
|
| 50 |
-
- Rate limiting (token bucket)
|
| 51 |
-
- Distributed tracing
|
| 52 |
-
- Token tracking
|
| 53 |
-
- Error handling with observability
|
| 54 |
-
- Context management
|
| 55 |
-
"""
|
| 56 |
|
| 57 |
start_time = time.time()
|
| 58 |
request_id = request.headers.get("x-request-id", "unknown")
|
|
@@ -67,7 +79,7 @@ async def ask_ai(
|
|
| 67 |
}
|
| 68 |
) as root_span:
|
| 69 |
try:
|
| 70 |
-
#
|
| 71 |
with trace_operation("token_validation"):
|
| 72 |
payload = decode_access_token(token.credentials)
|
| 73 |
if not payload:
|
|
@@ -76,7 +88,7 @@ async def ask_ai(
|
|
| 76 |
user_id = payload["user_id"]
|
| 77 |
user_role = payload.get("role", "renter")
|
| 78 |
|
| 79 |
-
#
|
| 80 |
with trace_operation(
|
| 81 |
"rate_limit_check",
|
| 82 |
{"user_id": user_id, "operation": "chat"}
|
|
@@ -91,20 +103,46 @@ async def ask_ai(
|
|
| 91 |
|
| 92 |
if not is_allowed:
|
| 93 |
logger.warning(
|
| 94 |
-
f"
|
| 95 |
extra={"rate_info": rate_info}
|
| 96 |
)
|
| 97 |
raise RateLimitExceeded(retry_after=60)
|
| 98 |
|
| 99 |
-
# Add rate limit headers
|
| 100 |
root_span.set_attribute("rate_limit.remaining", rate_info["user"]["remaining"])
|
| 101 |
root_span.set_attribute("rate_limit.capacity", rate_info["user"]["capacity"])
|
| 102 |
|
| 103 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
with trace_operation("context_management", {"user_id": user_id}):
|
| 105 |
window = get_message_window(user_id)
|
| 106 |
|
| 107 |
-
# Build conversation context from history
|
| 108 |
conversation_context = ""
|
| 109 |
if body.history:
|
| 110 |
for msg in body.history:
|
|
@@ -122,7 +160,7 @@ async def ask_ai(
|
|
| 122 |
# Add to message window
|
| 123 |
window.add_message("user", full_message)
|
| 124 |
|
| 125 |
-
#
|
| 126 |
with trace_operation(
|
| 127 |
"aida_chat_sync",
|
| 128 |
{
|
|
@@ -137,9 +175,8 @@ async def ask_ai(
|
|
| 137 |
full_message,
|
| 138 |
)
|
| 139 |
|
| 140 |
-
#
|
| 141 |
with trace_operation("token_tracking"):
|
| 142 |
-
# Track tokens if available
|
| 143 |
usage = final_state.get("token_usage", {})
|
| 144 |
if usage:
|
| 145 |
tracker = get_token_tracker()
|
|
@@ -151,7 +188,7 @@ async def ask_ai(
|
|
| 151 |
usage.get("cost", 0.0),
|
| 152 |
)
|
| 153 |
|
| 154 |
-
#
|
| 155 |
response = {
|
| 156 |
"success": True,
|
| 157 |
"text": final_state.get("ai_reply", ""),
|
|
@@ -162,11 +199,12 @@ async def ask_ai(
|
|
| 162 |
"request_id": request_id,
|
| 163 |
"processing_time_ms": int((time.time() - start_time) * 1000),
|
| 164 |
"user_id": user_id,
|
|
|
|
| 165 |
"status": final_state.get("status"),
|
| 166 |
},
|
| 167 |
}
|
| 168 |
|
| 169 |
-
#
|
| 170 |
with trace_operation("window_update"):
|
| 171 |
window.add_message("assistant", final_state.get("ai_reply", ""))
|
| 172 |
|
|
@@ -179,9 +217,10 @@ async def ask_ai(
|
|
| 179 |
})
|
| 180 |
|
| 181 |
logger.info(
|
| 182 |
-
f"
|
| 183 |
extra={
|
| 184 |
"user_id": user_id,
|
|
|
|
| 185 |
"request_id": request_id,
|
| 186 |
"processing_time_ms": response["metadata"]["processing_time_ms"],
|
| 187 |
"has_cards": len(response["cards"]) > 0,
|
|
@@ -192,7 +231,7 @@ async def ask_ai(
|
|
| 192 |
|
| 193 |
except RateLimitExceeded as e:
|
| 194 |
root_span.set_attribute("error.type", "rate_limit_exceeded")
|
| 195 |
-
logger.error(f"
|
| 196 |
raise HTTPException(
|
| 197 |
status_code=429,
|
| 198 |
detail=e.message,
|
|
@@ -206,7 +245,7 @@ async def ask_ai(
|
|
| 206 |
root_span.record_exception(e)
|
| 207 |
root_span.set_attribute("error.type", type(e).__name__)
|
| 208 |
logger.error(
|
| 209 |
-
f"
|
| 210 |
exc_info=True,
|
| 211 |
extra={"user_id": user_id if 'user_id' in locals() else "unknown"}
|
| 212 |
)
|
|
@@ -248,7 +287,7 @@ async def get_rate_limit_status(
|
|
| 248 |
}
|
| 249 |
|
| 250 |
# ============================================================
|
| 251 |
-
# Health Check
|
| 252 |
# ============================================================
|
| 253 |
|
| 254 |
@router.get("/health")
|
|
@@ -264,5 +303,6 @@ async def chat_health() -> dict:
|
|
| 264 |
"token_tracking": True,
|
| 265 |
"context_management": True,
|
| 266 |
"error_resilience": True,
|
|
|
|
| 267 |
},
|
| 268 |
}
|
|
|
|
| 1 |
+
# app/ai/routes/chat.py - Enhanced with Role Detection
|
| 2 |
from fastapi import APIRouter, Depends, HTTPException, Request
|
| 3 |
from fastapi.security import HTTPBearer
|
| 4 |
from pydantic import BaseModel
|
|
|
|
| 34 |
user_role: Optional[str] = None
|
| 35 |
history: Optional[List[MessageHistory]] = None
|
| 36 |
|
| 37 |
+
# ============================================================
|
| 38 |
+
# User Role Detection Helper
|
| 39 |
+
# ============================================================
|
| 40 |
+
|
| 41 |
+
async def detect_user_role_and_model(user_message: str):
|
| 42 |
+
"""Detect user role and rental model from message"""
|
| 43 |
+
try:
|
| 44 |
+
from app.ml.models.user_role_context_handler import UserRoleDetector
|
| 45 |
+
|
| 46 |
+
role_detector = UserRoleDetector()
|
| 47 |
+
detected_role, role_confidence = role_detector.detect_user_role(user_message.lower())
|
| 48 |
+
detected_model = role_detector.detect_rental_model(user_message.lower(), None)
|
| 49 |
+
|
| 50 |
+
return detected_role, role_confidence, detected_model
|
| 51 |
+
except Exception as e:
|
| 52 |
+
logger.warning(f"Role detection failed: {e}")
|
| 53 |
+
return None, 0.0, None
|
| 54 |
+
|
| 55 |
+
|
| 56 |
# ============================================================
|
| 57 |
# Enhanced Chat Endpoint
|
| 58 |
# ============================================================
|
|
|
|
| 64 |
request: Request,
|
| 65 |
token: str = Depends(security),
|
| 66 |
):
|
| 67 |
+
"""Enhanced chat endpoint with rate limiting and role detection"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
start_time = time.time()
|
| 70 |
request_id = request.headers.get("x-request-id", "unknown")
|
|
|
|
| 79 |
}
|
| 80 |
) as root_span:
|
| 81 |
try:
|
| 82 |
+
# Step 1: Validate Token
|
| 83 |
with trace_operation("token_validation"):
|
| 84 |
payload = decode_access_token(token.credentials)
|
| 85 |
if not payload:
|
|
|
|
| 88 |
user_id = payload["user_id"]
|
| 89 |
user_role = payload.get("role", "renter")
|
| 90 |
|
| 91 |
+
# Step 2: Rate Limiting
|
| 92 |
with trace_operation(
|
| 93 |
"rate_limit_check",
|
| 94 |
{"user_id": user_id, "operation": "chat"}
|
|
|
|
| 103 |
|
| 104 |
if not is_allowed:
|
| 105 |
logger.warning(
|
| 106 |
+
f"Rate limit exceeded for user: {user_id}",
|
| 107 |
extra={"rate_info": rate_info}
|
| 108 |
)
|
| 109 |
raise RateLimitExceeded(retry_after=60)
|
| 110 |
|
|
|
|
| 111 |
root_span.set_attribute("rate_limit.remaining", rate_info["user"]["remaining"])
|
| 112 |
root_span.set_attribute("rate_limit.capacity", rate_info["user"]["capacity"])
|
| 113 |
|
| 114 |
+
# Step 2b: User Role Detection (NEW)
|
| 115 |
+
with trace_operation("user_role_detection"):
|
| 116 |
+
try:
|
| 117 |
+
detected_role, role_confidence, detected_model = await detect_user_role_and_model(
|
| 118 |
+
body.message
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
if detected_role and role_confidence > 0.7:
|
| 122 |
+
logger.info(
|
| 123 |
+
f"User role detected",
|
| 124 |
+
extra={
|
| 125 |
+
"detected_role": detected_role,
|
| 126 |
+
"confidence": role_confidence,
|
| 127 |
+
"rental_model": detected_model.value if detected_model else None,
|
| 128 |
+
"original_role": user_role,
|
| 129 |
+
}
|
| 130 |
+
)
|
| 131 |
+
user_role = detected_role
|
| 132 |
+
root_span.set_attribute("detected_role", detected_role)
|
| 133 |
+
root_span.set_attribute("rental_model", detected_model.value if detected_model else None)
|
| 134 |
+
else:
|
| 135 |
+
logger.debug(f"Role detection inconclusive (confidence: {role_confidence:.0%})")
|
| 136 |
+
root_span.set_attribute("role_confidence", role_confidence)
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logger.warning(f"Role detection failed: {e}")
|
| 140 |
+
|
| 141 |
+
# Step 3: Context Management
|
| 142 |
with trace_operation("context_management", {"user_id": user_id}):
|
| 143 |
window = get_message_window(user_id)
|
| 144 |
|
| 145 |
+
# Build conversation context from history
|
| 146 |
conversation_context = ""
|
| 147 |
if body.history:
|
| 148 |
for msg in body.history:
|
|
|
|
| 160 |
# Add to message window
|
| 161 |
window.add_message("user", full_message)
|
| 162 |
|
| 163 |
+
# Step 4: AI Chat Processing
|
| 164 |
with trace_operation(
|
| 165 |
"aida_chat_sync",
|
| 166 |
{
|
|
|
|
| 175 |
full_message,
|
| 176 |
)
|
| 177 |
|
| 178 |
+
# Step 5: Token Tracking
|
| 179 |
with trace_operation("token_tracking"):
|
|
|
|
| 180 |
usage = final_state.get("token_usage", {})
|
| 181 |
if usage:
|
| 182 |
tracker = get_token_tracker()
|
|
|
|
| 188 |
usage.get("cost", 0.0),
|
| 189 |
)
|
| 190 |
|
| 191 |
+
# Step 6: Build Response
|
| 192 |
response = {
|
| 193 |
"success": True,
|
| 194 |
"text": final_state.get("ai_reply", ""),
|
|
|
|
| 199 |
"request_id": request_id,
|
| 200 |
"processing_time_ms": int((time.time() - start_time) * 1000),
|
| 201 |
"user_id": user_id,
|
| 202 |
+
"user_role": user_role,
|
| 203 |
"status": final_state.get("status"),
|
| 204 |
},
|
| 205 |
}
|
| 206 |
|
| 207 |
+
# Step 7: Add Message to Window
|
| 208 |
with trace_operation("window_update"):
|
| 209 |
window.add_message("assistant", final_state.get("ai_reply", ""))
|
| 210 |
|
|
|
|
| 217 |
})
|
| 218 |
|
| 219 |
logger.info(
|
| 220 |
+
f"Chat processed successfully",
|
| 221 |
extra={
|
| 222 |
"user_id": user_id,
|
| 223 |
+
"user_role": user_role,
|
| 224 |
"request_id": request_id,
|
| 225 |
"processing_time_ms": response["metadata"]["processing_time_ms"],
|
| 226 |
"has_cards": len(response["cards"]) > 0,
|
|
|
|
| 231 |
|
| 232 |
except RateLimitExceeded as e:
|
| 233 |
root_span.set_attribute("error.type", "rate_limit_exceeded")
|
| 234 |
+
logger.error(f"Rate limit: {str(e)}")
|
| 235 |
raise HTTPException(
|
| 236 |
status_code=429,
|
| 237 |
detail=e.message,
|
|
|
|
| 245 |
root_span.record_exception(e)
|
| 246 |
root_span.set_attribute("error.type", type(e).__name__)
|
| 247 |
logger.error(
|
| 248 |
+
f"Chat endpoint error: {str(e)}",
|
| 249 |
exc_info=True,
|
| 250 |
extra={"user_id": user_id if 'user_id' in locals() else "unknown"}
|
| 251 |
)
|
|
|
|
| 287 |
}
|
| 288 |
|
| 289 |
# ============================================================
|
| 290 |
+
# Health Check
|
| 291 |
# ============================================================
|
| 292 |
|
| 293 |
@router.get("/health")
|
|
|
|
| 303 |
"token_tracking": True,
|
| 304 |
"context_management": True,
|
| 305 |
"error_resilience": True,
|
| 306 |
+
"user_role_detection": True,
|
| 307 |
},
|
| 308 |
}
|
app/ai/services/dynamic_role_manager.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/ai/services/dynamic_role_manager.py - NEW
|
| 2 |
+
"""
|
| 3 |
+
Dynamic Role Management System
|
| 4 |
+
Maps user's base role (landlord/renter) to specific roles based on listing/search type
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Tuple
|
| 9 |
+
from enum import Enum
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class BaseRole(Enum):
|
| 15 |
+
"""User's base role at signup"""
|
| 16 |
+
LANDLORD = "landlord"
|
| 17 |
+
RENTER = "renter"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class DynamicRole(Enum):
|
| 21 |
+
"""Dynamic roles based on action/listing type"""
|
| 22 |
+
# Landlord-derived roles
|
| 23 |
+
LANDLORD = "landlord" # Creating rent listings
|
| 24 |
+
HOST = "host" # Creating short-stay listings
|
| 25 |
+
SELLER = "seller" # Creating sale listings
|
| 26 |
+
|
| 27 |
+
# Renter-derived roles
|
| 28 |
+
RENTER = "renter" # Searching for rentals
|
| 29 |
+
GUEST = "guest" # Searching for short-stay
|
| 30 |
+
BUYER = "buyer" # Searching for properties to buy
|
| 31 |
+
ROOMMATE_SEEKER = "roommate_seeker" # Searching for rooms
|
| 32 |
+
ROOMMATE_LISTER = "roommate_lister" # Listing rooms (only renters)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class DynamicRoleManager:
|
| 36 |
+
"""Manage dynamic role assignment based on user action"""
|
| 37 |
+
|
| 38 |
+
def __init__(self):
|
| 39 |
+
logger.info("Dynamic Role Manager initialized")
|
| 40 |
+
|
| 41 |
+
def get_dynamic_role_for_listing(
|
| 42 |
+
self,
|
| 43 |
+
base_role: str,
|
| 44 |
+
listing_type: str
|
| 45 |
+
) -> Tuple[str, str, float]:
|
| 46 |
+
"""
|
| 47 |
+
Get dynamic role when user creates a LISTING
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
base_role: User's base role at signup (landlord or renter)
|
| 51 |
+
listing_type: Type of listing (rent, short-stay, sale, roommate)
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
(dynamic_role, description, confidence)
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
base_role_lower = base_role.lower().strip()
|
| 58 |
+
listing_type_lower = listing_type.lower().strip() if listing_type else None
|
| 59 |
+
|
| 60 |
+
logger.info(f"Getting listing role: base={base_role_lower}, type={listing_type_lower}")
|
| 61 |
+
|
| 62 |
+
# LANDLORD creating listings
|
| 63 |
+
if base_role_lower == "landlord":
|
| 64 |
+
|
| 65 |
+
# Landlord + short-stay = HOST
|
| 66 |
+
if listing_type_lower == "short-stay":
|
| 67 |
+
logger.info("Landlord creating short-stay -> HOST role")
|
| 68 |
+
return DynamicRole.HOST.value, "Short-stay property host", 0.99
|
| 69 |
+
|
| 70 |
+
# Landlord + sale = SELLER
|
| 71 |
+
elif listing_type_lower == "sale":
|
| 72 |
+
logger.info("Landlord creating sale -> SELLER role")
|
| 73 |
+
return DynamicRole.SELLER.value, "Property seller", 0.99
|
| 74 |
+
|
| 75 |
+
# Landlord + rent = LANDLORD (standard)
|
| 76 |
+
elif listing_type_lower == "rent":
|
| 77 |
+
logger.info("Landlord creating rent -> LANDLORD role")
|
| 78 |
+
return DynamicRole.LANDLORD.value, "Rental property landlord", 0.99
|
| 79 |
+
|
| 80 |
+
# Landlord cannot create roommate listings
|
| 81 |
+
elif listing_type_lower == "roommate":
|
| 82 |
+
logger.warning("Landlord cannot create roommate listings")
|
| 83 |
+
return None, "Landlords cannot list for roommate matching", 0.0
|
| 84 |
+
|
| 85 |
+
# RENTER creating listings
|
| 86 |
+
elif base_role_lower == "renter":
|
| 87 |
+
|
| 88 |
+
# Only renters can create roommate listings
|
| 89 |
+
if listing_type_lower == "roommate":
|
| 90 |
+
logger.info("Renter creating roommate -> ROOMMATE_LISTER role")
|
| 91 |
+
return DynamicRole.ROOMMATE_LISTER.value, "Room share lister", 0.99
|
| 92 |
+
|
| 93 |
+
# Renters cannot create rent/short-stay/sale listings
|
| 94 |
+
else:
|
| 95 |
+
logger.warning(f"Renter cannot create {listing_type_lower} listings")
|
| 96 |
+
return None, "Renters can only list rooms for roommate matching", 0.0
|
| 97 |
+
|
| 98 |
+
logger.warning(f"Unknown base role: {base_role_lower}")
|
| 99 |
+
return None, "Unknown role", 0.0
|
| 100 |
+
|
| 101 |
+
def get_dynamic_role_for_search(
|
| 102 |
+
self,
|
| 103 |
+
base_role: str,
|
| 104 |
+
search_for: str = None
|
| 105 |
+
) -> Tuple[str, str, float]:
|
| 106 |
+
"""
|
| 107 |
+
Get dynamic role when user SEARCHES for properties
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
base_role: User's base role at signup (landlord or renter)
|
| 111 |
+
search_for: What they're searching for (rent, short-stay, sale, roommate)
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
(dynamic_role, description, confidence)
|
| 115 |
+
"""
|
| 116 |
+
|
| 117 |
+
base_role_lower = base_role.lower().strip()
|
| 118 |
+
search_for_lower = search_for.lower().strip() if search_for else None
|
| 119 |
+
|
| 120 |
+
logger.info(f"Getting search role: base={base_role_lower}, search_for={search_for_lower}")
|
| 121 |
+
|
| 122 |
+
# LANDLORD searching
|
| 123 |
+
if base_role_lower == "landlord":
|
| 124 |
+
logger.warning("Landlords searching for properties - unusual but allowed")
|
| 125 |
+
|
| 126 |
+
if search_for_lower == "short-stay":
|
| 127 |
+
return DynamicRole.GUEST.value, "Short-stay guest (unusual)", 0.5
|
| 128 |
+
elif search_for_lower == "sale":
|
| 129 |
+
return DynamicRole.BUYER.value, "Property buyer (unusual)", 0.5
|
| 130 |
+
else:
|
| 131 |
+
return DynamicRole.RENTER.value, "Searching for rentals (unusual)", 0.5
|
| 132 |
+
|
| 133 |
+
# RENTER searching
|
| 134 |
+
elif base_role_lower == "renter":
|
| 135 |
+
|
| 136 |
+
# Searching for rentals = RENTER
|
| 137 |
+
if search_for_lower == "rent":
|
| 138 |
+
logger.info("Renter searching for rent -> RENTER role")
|
| 139 |
+
return DynamicRole.RENTER.value, "Rental searcher", 0.99
|
| 140 |
+
|
| 141 |
+
# Searching for short-stay = GUEST
|
| 142 |
+
elif search_for_lower == "short-stay":
|
| 143 |
+
logger.info("Renter searching for short-stay -> GUEST role")
|
| 144 |
+
return DynamicRole.GUEST.value, "Short-stay guest", 0.99
|
| 145 |
+
|
| 146 |
+
# Searching for sale = BUYER
|
| 147 |
+
elif search_for_lower == "sale":
|
| 148 |
+
logger.info("Renter searching for sale -> BUYER role")
|
| 149 |
+
return DynamicRole.BUYER.value, "Property buyer", 0.99
|
| 150 |
+
|
| 151 |
+
# Searching for roommate = ROOMMATE_SEEKER
|
| 152 |
+
elif search_for_lower == "roommate":
|
| 153 |
+
logger.info("Renter searching for roommate -> ROOMMATE_SEEKER role")
|
| 154 |
+
return DynamicRole.ROOMMATE_SEEKER.value, "Roommate seeker", 0.99
|
| 155 |
+
|
| 156 |
+
# Default search = RENTER
|
| 157 |
+
else:
|
| 158 |
+
logger.info("Renter searching (unspecified) -> RENTER role")
|
| 159 |
+
return DynamicRole.RENTER.value, "Property searcher", 0.95
|
| 160 |
+
|
| 161 |
+
logger.warning(f"Unknown base role: {base_role_lower}")
|
| 162 |
+
return None, "Unknown role", 0.0
|
| 163 |
+
|
| 164 |
+
def get_ai_prompt_for_role(self, dynamic_role: str) -> str:
|
| 165 |
+
"""
|
| 166 |
+
Get AI prompt context for specific dynamic role
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
dynamic_role: The dynamic role (e.g., "host", "seller", "guest")
|
| 170 |
+
|
| 171 |
+
Returns:
|
| 172 |
+
Prompt context string for AI
|
| 173 |
+
"""
|
| 174 |
+
|
| 175 |
+
prompts = {
|
| 176 |
+
"landlord": """
|
| 177 |
+
You are helping a landlord list a rental property.
|
| 178 |
+
Focus on: location, monthly/yearly price, bedrooms, bathrooms, furnished/unfurnished, utilities
|
| 179 |
+
Help collect: location, price (monthly), bedrooms, bathrooms, amenities, requirements for tenants
|
| 180 |
+
Pricing context: Monthly or yearly rental rates
|
| 181 |
+
""",
|
| 182 |
+
|
| 183 |
+
"host": """
|
| 184 |
+
You are helping a short-stay property host (Airbnb-style).
|
| 185 |
+
Focus on: location, nightly/daily price, guest amenities, cleanliness, WiFi, kitchen, parking
|
| 186 |
+
Help collect: location, price (nightly), bedrooms, bathrooms, amenities, house rules
|
| 187 |
+
Pricing context: Nightly, daily, or weekly rates
|
| 188 |
+
Emphasize: Guest experience, cleanliness, quick check-in/out, amenities
|
| 189 |
+
""",
|
| 190 |
+
|
| 191 |
+
"seller": """
|
| 192 |
+
You are helping someone sell a property.
|
| 193 |
+
Focus on: location, total sale price, property condition, bedrooms, bathrooms, unique features
|
| 194 |
+
Help collect: location, price (fixed), bedrooms, bathrooms, property type, amenities
|
| 195 |
+
Pricing context: Total sale price (not per month/night)
|
| 196 |
+
Emphasize: Investment potential, property condition, neighborhood, documentation status
|
| 197 |
+
""",
|
| 198 |
+
|
| 199 |
+
"renter": """
|
| 200 |
+
You are helping a renter find a rental property.
|
| 201 |
+
Focus on: budget, location, bedrooms, bathrooms, move-in date, lease terms
|
| 202 |
+
Ask questions about: Budget range, location preference, must-have amenities
|
| 203 |
+
Show filters for: Monthly price range, bedrooms, bathrooms, furnished/unfurnished
|
| 204 |
+
Emphasize: Affordability, proximity to work/school, security, utilities included
|
| 205 |
+
""",
|
| 206 |
+
|
| 207 |
+
"guest": """
|
| 208 |
+
You are helping someone find a short-stay property (Airbnb-style).
|
| 209 |
+
Focus on: budget, location, dates, guest amenities, proximity to attractions
|
| 210 |
+
Ask questions about: Check-in date, check-out date, budget per night, location preference
|
| 211 |
+
Show filters for: Nightly price range, bedrooms, host reviews, amenities
|
| 212 |
+
Emphasize: Guest reviews, cleanliness, host responsiveness, location convenience
|
| 213 |
+
""",
|
| 214 |
+
|
| 215 |
+
"buyer": """
|
| 216 |
+
You are helping someone buy a property.
|
| 217 |
+
Focus on: budget, location, property type, bedrooms, bathrooms, investment potential
|
| 218 |
+
Ask questions about: Total budget, location preference, property type, timeline
|
| 219 |
+
Show filters for: Price range, bedrooms, bathrooms, neighborhood, property type
|
| 220 |
+
Emphasize: Investment returns, property condition, financing options, neighborhood potential
|
| 221 |
+
""",
|
| 222 |
+
|
| 223 |
+
"roommate_seeker": """
|
| 224 |
+
You are helping someone find a room to share with a roommate.
|
| 225 |
+
Focus on: budget, location, roommate compatibility, house rules, utilities included
|
| 226 |
+
Ask questions about: Budget, location, move-in date, roommate preferences
|
| 227 |
+
Show filters for: Monthly budget, bedrooms available, location, utilities included
|
| 228 |
+
Emphasize: Affordable housing, roommate compatibility, house rules, community
|
| 229 |
+
""",
|
| 230 |
+
|
| 231 |
+
"roommate_lister": """
|
| 232 |
+
You are helping a renter list a room for roommate matching.
|
| 233 |
+
Focus on: location, monthly price, available rooms, house rules, utilities included
|
| 234 |
+
Help collect: location, price (monthly), rooms available, bathrooms, house rules, amenities
|
| 235 |
+
Pricing context: Monthly rates split between roommates
|
| 236 |
+
Emphasize: Roommate compatibility, house culture, shared amenities, community
|
| 237 |
+
""",
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
return prompts.get(dynamic_role, prompts["renter"])
|
| 241 |
+
|
| 242 |
+
def validate_role_action(
|
| 243 |
+
self,
|
| 244 |
+
base_role: str,
|
| 245 |
+
action: str,
|
| 246 |
+
listing_or_search_type: str
|
| 247 |
+
) -> Tuple[bool, str]:
|
| 248 |
+
"""
|
| 249 |
+
Validate if user (base role) can perform action with given type
|
| 250 |
+
|
| 251 |
+
Args:
|
| 252 |
+
base_role: User's base role (landlord or renter)
|
| 253 |
+
action: What user wants to do (list, search)
|
| 254 |
+
listing_or_search_type: Type (rent, short-stay, sale, roommate)
|
| 255 |
+
|
| 256 |
+
Returns:
|
| 257 |
+
(is_allowed, reason)
|
| 258 |
+
"""
|
| 259 |
+
|
| 260 |
+
base_role_lower = base_role.lower().strip()
|
| 261 |
+
action_lower = action.lower().strip()
|
| 262 |
+
type_lower = listing_or_search_type.lower().strip()
|
| 263 |
+
|
| 264 |
+
logger.info(f"Validating: {base_role_lower} {action_lower} {type_lower}")
|
| 265 |
+
|
| 266 |
+
# LANDLORD rules for LISTING
|
| 267 |
+
if base_role_lower == "landlord" and action_lower == "list":
|
| 268 |
+
if type_lower in ["rent", "short-stay", "sale"]:
|
| 269 |
+
return True, f"Landlord can list {type_lower}"
|
| 270 |
+
elif type_lower == "roommate":
|
| 271 |
+
return False, "Landlords cannot list for roommate matching. Only renters can share rooms."
|
| 272 |
+
else:
|
| 273 |
+
return False, f"Unknown listing type: {type_lower}"
|
| 274 |
+
|
| 275 |
+
# RENTER rules for LISTING
|
| 276 |
+
if base_role_lower == "renter" and action_lower == "list":
|
| 277 |
+
if type_lower == "roommate":
|
| 278 |
+
return True, "Renters can list rooms for roommate matching"
|
| 279 |
+
elif type_lower in ["rent", "short-stay", "sale"]:
|
| 280 |
+
return False, "Renters can only list rooms for roommate matching"
|
| 281 |
+
else:
|
| 282 |
+
return False, f"Unknown listing type: {type_lower}"
|
| 283 |
+
|
| 284 |
+
# SEARCH rules (both can search anything)
|
| 285 |
+
if action_lower == "search":
|
| 286 |
+
return True, f"Can search for {type_lower}"
|
| 287 |
+
|
| 288 |
+
return False, f"Invalid action: {action_lower}"
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
# Singleton
|
| 292 |
+
_manager = None
|
| 293 |
+
|
| 294 |
+
def get_dynamic_role_manager() -> DynamicRoleManager:
|
| 295 |
+
"""Get or create singleton"""
|
| 296 |
+
global _manager
|
| 297 |
+
if _manager is None:
|
| 298 |
+
_manager = DynamicRoleManager()
|
| 299 |
+
return _manager
|
app/ml/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (180 Bytes). View file
|
|
|
app/ml/models/combined_training_data.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
price,bedrooms,bathrooms,location,dataset_source,price_per_bedroom,price_per_bathroom,total_rooms,price_range
|
| 2 |
+
250000,2,1,Unknown,dummy,125000.0,250000.0,3,mid
|
| 3 |
+
350000,3,2,Unknown,dummy,116666.66666666667,175000.0,5,mid
|
| 4 |
+
450000,4,3,Unknown,dummy,112500.0,150000.0,7,mid
|
app/ml/models/combined_training_data.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"price": 250000, "bedrooms": 2, "bathrooms": 1, "location": "Unknown", "dataset_source": "dummy", "price_per_bedroom": 125000.0, "price_per_bathroom": 250000.0, "total_rooms": 3, "price_range": "mid"}
|
| 2 |
+
{"price": 350000, "bedrooms": 3, "bathrooms": 2, "location": "Unknown", "dataset_source": "dummy", "price_per_bedroom": 116666.66666666667, "price_per_bathroom": 175000.0, "total_rooms": 5, "price_range": "mid"}
|
| 3 |
+
{"price": 450000, "bedrooms": 4, "bathrooms": 3, "location": "Unknown", "dataset_source": "dummy", "price_per_bedroom": 112500.0, "price_per_bathroom": 150000.0, "total_rooms": 7, "price_range": "mid"}
|
app/ml/models/combined_training_data.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b21dc388814014d2ac5628bc908cd8d1eb26b796b80e32d085602570bb37e457
|
| 3 |
+
size 6104
|
app/ml/models/dataset_info.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rows": 3,
|
| 3 |
+
"cols": [
|
| 4 |
+
"price",
|
| 5 |
+
"bedrooms",
|
| 6 |
+
"bathrooms",
|
| 7 |
+
"location",
|
| 8 |
+
"dataset_source",
|
| 9 |
+
"price_per_bedroom",
|
| 10 |
+
"price_per_bathroom",
|
| 11 |
+
"total_rooms",
|
| 12 |
+
"price_range"
|
| 13 |
+
],
|
| 14 |
+
"price": {
|
| 15 |
+
"mean": 350000.0,
|
| 16 |
+
"median": 350000.0,
|
| 17 |
+
"min": 250000.0,
|
| 18 |
+
"max": 450000.0
|
| 19 |
+
},
|
| 20 |
+
"bedrooms": {
|
| 21 |
+
"mean": 3.0,
|
| 22 |
+
"min": 2,
|
| 23 |
+
"max": 4
|
| 24 |
+
},
|
| 25 |
+
"bathrooms": {
|
| 26 |
+
"mean": 2.0,
|
| 27 |
+
"min": 1,
|
| 28 |
+
"max": 3
|
| 29 |
+
},
|
| 30 |
+
"locations": 1
|
| 31 |
+
}
|
app/ml/models/field_models.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02ac017f49114480a299778b1190bcd6a30a30f723abcfae85e34b2331e73c7f
|
| 3 |
+
size 91689397
|
app/ml/models/ml_listing_extractor.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app/ml/ml_listing_extractor.py - Complete ML Extractor
|
| 2 |
import json
|
| 3 |
import numpy as np
|
| 4 |
from typing import Dict, List, Tuple, Optional
|
|
@@ -6,8 +6,6 @@ import logging
|
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
from datetime import datetime, timedelta
|
| 9 |
-
from sklearn.ensemble import RandomForestClassifier
|
| 10 |
-
from sklearn.preprocessing import LabelEncoder
|
| 11 |
import joblib
|
| 12 |
from sentence_transformers import SentenceTransformer
|
| 13 |
import aiohttp
|
|
@@ -28,7 +26,7 @@ class CurrencyManager:
|
|
| 28 |
self.cache_expiry = {}
|
| 29 |
self.cache_ttl = 86400 # 24 hours
|
| 30 |
|
| 31 |
-
logger.info("
|
| 32 |
|
| 33 |
async def get_currency_for_location(self, location: str) -> Tuple[str, str, str, float]:
|
| 34 |
"""Get currency for a location using geolocation API"""
|
|
@@ -39,7 +37,7 @@ class CurrencyManager:
|
|
| 39 |
if location_lower in self.location_cache:
|
| 40 |
cached = self.location_cache[location_lower]
|
| 41 |
if self._is_cache_valid(f"location_{location_lower}"):
|
| 42 |
-
logger.info(f"
|
| 43 |
return cached['currency'], cached['country'], cached['city'], 0.95
|
| 44 |
|
| 45 |
try:
|
|
@@ -85,13 +83,13 @@ class CurrencyManager:
|
|
| 85 |
}
|
| 86 |
self._set_cache_expiry(f"location_{location_lower}")
|
| 87 |
|
| 88 |
-
logger.info(f"
|
| 89 |
return currency, country, city, 0.93
|
| 90 |
|
| 91 |
except Exception as e:
|
| 92 |
-
logger.warning(f"
|
| 93 |
|
| 94 |
-
logger.warning(f"
|
| 95 |
return None, None, location, 0.0
|
| 96 |
|
| 97 |
async def _get_currency_for_country(self, country_name: str, country_code: str = None) -> Optional[str]:
|
|
@@ -111,7 +109,7 @@ class CurrencyManager:
|
|
| 111 |
currencies = data[0].get('currencies', {})
|
| 112 |
if currencies:
|
| 113 |
currency_code = list(currencies.keys())[0]
|
| 114 |
-
logger.info(f"
|
| 115 |
return currency_code
|
| 116 |
|
| 117 |
except Exception as e:
|
|
@@ -131,7 +129,7 @@ class CurrencyManager:
|
|
| 131 |
cache_key = f"{from_currency}_rates"
|
| 132 |
if cache_key in self.exchange_rate_cache:
|
| 133 |
if self._is_cache_valid(cache_key):
|
| 134 |
-
logger.info(f"
|
| 135 |
return self.exchange_rate_cache[cache_key]
|
| 136 |
|
| 137 |
try:
|
|
@@ -152,11 +150,11 @@ class CurrencyManager:
|
|
| 152 |
self.exchange_rate_cache[cache_key] = filtered_rates
|
| 153 |
self._set_cache_expiry(cache_key)
|
| 154 |
|
| 155 |
-
logger.info(f"
|
| 156 |
return filtered_rates
|
| 157 |
|
| 158 |
except Exception as e:
|
| 159 |
-
logger.error(f"
|
| 160 |
|
| 161 |
return {}
|
| 162 |
|
|
@@ -176,17 +174,17 @@ class CurrencyManager:
|
|
| 176 |
self.location_cache.clear()
|
| 177 |
self.exchange_rate_cache.clear()
|
| 178 |
self.cache_expiry.clear()
|
| 179 |
-
logger.info("
|
| 180 |
|
| 181 |
|
| 182 |
class MLListingExtractor:
|
| 183 |
-
"""ML-powered field extractor with SMART INFERENCE"""
|
| 184 |
|
| 185 |
def __init__(self, model_dir: str = "app/ml/models"):
|
| 186 |
-
self.model_dir = model_dir
|
| 187 |
os.makedirs(model_dir, exist_ok=True)
|
| 188 |
|
| 189 |
-
logger.info("
|
| 190 |
|
| 191 |
# Embedder for semantic similarity
|
| 192 |
try:
|
|
@@ -201,7 +199,7 @@ class MLListingExtractor:
|
|
| 201 |
# Currency manager with live APIs
|
| 202 |
self.currency_mgr = CurrencyManager()
|
| 203 |
|
| 204 |
-
#
|
| 205 |
self.field_models = self._load_field_models()
|
| 206 |
|
| 207 |
# Learning history
|
|
@@ -209,11 +207,11 @@ class MLListingExtractor:
|
|
| 209 |
self.field_patterns = {}
|
| 210 |
self.user_corrections = {}
|
| 211 |
|
| 212 |
-
logger.info("
|
| 213 |
|
| 214 |
def _load_field_models(self) -> Dict:
|
| 215 |
-
"""Load
|
| 216 |
-
|
| 217 |
possible_paths = [
|
| 218 |
f"{self.model_dir}/field_models.pkl",
|
| 219 |
"app/ml/models/field_models.pkl",
|
|
@@ -224,13 +222,20 @@ class MLListingExtractor:
|
|
| 224 |
if os.path.exists(models_file):
|
| 225 |
try:
|
| 226 |
models = joblib.load(models_file)
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
return models
|
|
|
|
| 229 |
except Exception as e:
|
| 230 |
logger.warning(f"Failed to load models from {models_file}: {e}")
|
| 231 |
|
| 232 |
-
logger.info("
|
| 233 |
-
logger.info("
|
|
|
|
| 234 |
|
| 235 |
# Return empty models dict as fallback
|
| 236 |
return {
|
|
@@ -239,59 +244,57 @@ class MLListingExtractor:
|
|
| 239 |
"price_patterns": {},
|
| 240 |
"amenity_frequencies": {},
|
| 241 |
"embedder": None,
|
|
|
|
| 242 |
}
|
| 243 |
|
| 244 |
-
# ==================== SMART LISTING TYPE INFERENCE ====================
|
| 245 |
def infer_listing_type(self, state: Dict, user_role: str = None, user_message: str = None) -> Tuple[str, float]:
|
| 246 |
-
"""Intelligently infer listing_type
|
| 247 |
|
| 248 |
explicit_type = state.get("listing_type")
|
| 249 |
price_type = state.get("price_type")
|
| 250 |
|
| 251 |
-
# 1
|
| 252 |
if explicit_type and explicit_type in ["rent", "short-stay", "sale", "roommate"]:
|
| 253 |
-
logger.info(f"
|
| 254 |
return explicit_type, 0.99
|
| 255 |
|
| 256 |
-
# 2
|
| 257 |
if user_role:
|
| 258 |
user_role_lower = user_role.lower().strip()
|
| 259 |
|
| 260 |
# If user is renter, ALWAYS roommate
|
| 261 |
if user_role_lower == "renter":
|
| 262 |
-
logger.info(
|
| 263 |
return "roommate", 0.98
|
| 264 |
|
| 265 |
# If user is landlord, check other signals
|
| 266 |
if user_role_lower == "landlord":
|
| 267 |
-
# Check user message for sale keywords
|
| 268 |
if user_message:
|
| 269 |
msg_lower = user_message.lower()
|
| 270 |
sale_keywords = ["sell", "sale", "selling", "for sale", "purchase", "buy"]
|
| 271 |
|
| 272 |
if any(keyword in msg_lower for keyword in sale_keywords):
|
| 273 |
-
logger.info(
|
| 274 |
return "sale", 0.95
|
| 275 |
|
| 276 |
-
# 3
|
| 277 |
if price_type:
|
| 278 |
price_type_lower = price_type.lower().strip()
|
| 279 |
|
| 280 |
# Short-stay indicators
|
| 281 |
if price_type_lower in ["nightly", "daily", "weekly", "night", "day", "week"]:
|
| 282 |
-
logger.info(f"
|
| 283 |
return "short-stay", 0.95
|
| 284 |
|
| 285 |
-
# Monthly/Yearly = RENT
|
| 286 |
elif price_type_lower in ["monthly", "yearly", "month", "year", "mth", "yr"]:
|
| 287 |
-
logger.info(f"
|
| 288 |
return "rent", 0.95
|
| 289 |
|
| 290 |
-
# 4
|
| 291 |
-
logger.warning("
|
| 292 |
return "rent", 0.5
|
| 293 |
|
| 294 |
-
# ==================== SMART CURRENCY INFERENCE ====================
|
| 295 |
async def infer_currency(self, state: Dict) -> Tuple[str, str, float]:
|
| 296 |
"""Intelligently infer currency from location"""
|
| 297 |
|
|
@@ -299,29 +302,28 @@ class MLListingExtractor:
|
|
| 299 |
location = state.get("location")
|
| 300 |
listing_type = state.get("listing_type")
|
| 301 |
|
| 302 |
-
# 1
|
| 303 |
if explicit_currency and len(explicit_currency) == 3:
|
| 304 |
-
logger.info(f"
|
| 305 |
return explicit_currency, location, 0.99
|
| 306 |
|
| 307 |
-
# 2
|
| 308 |
if listing_type == "short-stay":
|
| 309 |
-
logger.info(
|
| 310 |
return "USD", location, 0.98
|
| 311 |
|
| 312 |
-
# 3
|
| 313 |
if location:
|
| 314 |
currency, country, city, confidence = await self.currency_mgr.get_currency_for_location(location)
|
| 315 |
|
| 316 |
if currency:
|
| 317 |
-
logger.info(f"
|
| 318 |
state["location"] = city
|
| 319 |
return currency, city, confidence
|
| 320 |
|
| 321 |
-
logger.warning("
|
| 322 |
return None, location, 0.0
|
| 323 |
|
| 324 |
-
# ==================== PRICE CONVERSION FOR DISPLAY ====================
|
| 325 |
async def convert_price_for_display(self, price: float, from_currency: str, to_currency: str = "USD") -> Dict:
|
| 326 |
"""Convert price using LIVE exchange rates"""
|
| 327 |
|
|
@@ -346,7 +348,7 @@ class MLListingExtractor:
|
|
| 346 |
from_symbol = symbols.get(from_currency, from_currency)
|
| 347 |
to_symbol = symbols.get(to_currency, to_currency)
|
| 348 |
|
| 349 |
-
formatted = f"{from_symbol}{price:,} (
|
| 350 |
|
| 351 |
return {
|
| 352 |
"original_price": price,
|
|
@@ -358,7 +360,7 @@ class MLListingExtractor:
|
|
| 358 |
}
|
| 359 |
|
| 360 |
except Exception as e:
|
| 361 |
-
logger.error(f"
|
| 362 |
|
| 363 |
return {
|
| 364 |
"original_price": price,
|
|
@@ -375,14 +377,14 @@ class MLListingExtractor:
|
|
| 375 |
|
| 376 |
symbol_map = {
|
| 377 |
"USD": "$",
|
| 378 |
-
"EUR": "
|
| 379 |
-
"GBP": "
|
| 380 |
-
"NGN": "
|
| 381 |
-
"XOF": "
|
| 382 |
-
"KES": "
|
| 383 |
-
"GHS": "
|
| 384 |
-
"ZAR": "
|
| 385 |
-
"AED": "
|
| 386 |
}
|
| 387 |
|
| 388 |
for currency in currencies:
|
|
@@ -446,15 +448,14 @@ class MLListingExtractor:
|
|
| 446 |
"confidence": 0.93
|
| 447 |
}
|
| 448 |
|
| 449 |
-
logger.info(f"
|
| 450 |
return city, location_info
|
| 451 |
|
| 452 |
except Exception as e:
|
| 453 |
-
logger.error(f"
|
| 454 |
|
| 455 |
return None, {}
|
| 456 |
|
| 457 |
-
# ==================== FIELD VALIDATION ====================
|
| 458 |
def validate_field(self, field_name: str, value: any, user_input: str, user_id: str = None) -> Dict:
|
| 459 |
"""Validate a single field"""
|
| 460 |
|
|
|
|
| 1 |
+
# app/ml/models/ml_listing_extractor.py - Complete ML Extractor
|
| 2 |
import json
|
| 3 |
import numpy as np
|
| 4 |
from typing import Dict, List, Tuple, Optional
|
|
|
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
from datetime import datetime, timedelta
|
|
|
|
|
|
|
| 9 |
import joblib
|
| 10 |
from sentence_transformers import SentenceTransformer
|
| 11 |
import aiohttp
|
|
|
|
| 26 |
self.cache_expiry = {}
|
| 27 |
self.cache_ttl = 86400 # 24 hours
|
| 28 |
|
| 29 |
+
logger.info("Currency Manager initialized")
|
| 30 |
|
| 31 |
async def get_currency_for_location(self, location: str) -> Tuple[str, str, str, float]:
|
| 32 |
"""Get currency for a location using geolocation API"""
|
|
|
|
| 37 |
if location_lower in self.location_cache:
|
| 38 |
cached = self.location_cache[location_lower]
|
| 39 |
if self._is_cache_valid(f"location_{location_lower}"):
|
| 40 |
+
logger.info(f"Location cache hit: {location} -> {cached['city']} -> {cached['currency']}")
|
| 41 |
return cached['currency'], cached['country'], cached['city'], 0.95
|
| 42 |
|
| 43 |
try:
|
|
|
|
| 83 |
}
|
| 84 |
self._set_cache_expiry(f"location_{location_lower}")
|
| 85 |
|
| 86 |
+
logger.info(f"Found: {location} -> {city}, {country} -> {currency}")
|
| 87 |
return currency, country, city, 0.93
|
| 88 |
|
| 89 |
except Exception as e:
|
| 90 |
+
logger.warning(f"Failed to get currency for location {location}: {e}")
|
| 91 |
|
| 92 |
+
logger.warning(f"Could not determine currency for {location}")
|
| 93 |
return None, None, location, 0.0
|
| 94 |
|
| 95 |
async def _get_currency_for_country(self, country_name: str, country_code: str = None) -> Optional[str]:
|
|
|
|
| 109 |
currencies = data[0].get('currencies', {})
|
| 110 |
if currencies:
|
| 111 |
currency_code = list(currencies.keys())[0]
|
| 112 |
+
logger.info(f"Country {country_name} ({country_code}) -> {currency_code}")
|
| 113 |
return currency_code
|
| 114 |
|
| 115 |
except Exception as e:
|
|
|
|
| 129 |
cache_key = f"{from_currency}_rates"
|
| 130 |
if cache_key in self.exchange_rate_cache:
|
| 131 |
if self._is_cache_valid(cache_key):
|
| 132 |
+
logger.info(f"Exchange rate cache hit for {from_currency}")
|
| 133 |
return self.exchange_rate_cache[cache_key]
|
| 134 |
|
| 135 |
try:
|
|
|
|
| 150 |
self.exchange_rate_cache[cache_key] = filtered_rates
|
| 151 |
self._set_cache_expiry(cache_key)
|
| 152 |
|
| 153 |
+
logger.info(f"Fetched exchange rates for {from_currency}")
|
| 154 |
return filtered_rates
|
| 155 |
|
| 156 |
except Exception as e:
|
| 157 |
+
logger.error(f"Failed to fetch exchange rates: {e}")
|
| 158 |
|
| 159 |
return {}
|
| 160 |
|
|
|
|
| 174 |
self.location_cache.clear()
|
| 175 |
self.exchange_rate_cache.clear()
|
| 176 |
self.cache_expiry.clear()
|
| 177 |
+
logger.info("Currency caches cleared")
|
| 178 |
|
| 179 |
|
| 180 |
class MLListingExtractor:
|
| 181 |
+
"""ML-powered field extractor with SMART INFERENCE + TRAINED MODELS"""
|
| 182 |
|
| 183 |
def __init__(self, model_dir: str = "app/ml/models"):
|
| 184 |
+
self.model_dir = model_dir
|
| 185 |
os.makedirs(model_dir, exist_ok=True)
|
| 186 |
|
| 187 |
+
logger.info("Loading ML Listing Extractor...")
|
| 188 |
|
| 189 |
# Embedder for semantic similarity
|
| 190 |
try:
|
|
|
|
| 199 |
# Currency manager with live APIs
|
| 200 |
self.currency_mgr = CurrencyManager()
|
| 201 |
|
| 202 |
+
# Load trained field models
|
| 203 |
self.field_models = self._load_field_models()
|
| 204 |
|
| 205 |
# Learning history
|
|
|
|
| 207 |
self.field_patterns = {}
|
| 208 |
self.user_corrections = {}
|
| 209 |
|
| 210 |
+
logger.info("ML Extractor loaded with live currency APIs & trained models")
|
| 211 |
|
| 212 |
def _load_field_models(self) -> Dict:
|
| 213 |
+
"""Load trained field ML models from enhanced training"""
|
| 214 |
+
|
| 215 |
possible_paths = [
|
| 216 |
f"{self.model_dir}/field_models.pkl",
|
| 217 |
"app/ml/models/field_models.pkl",
|
|
|
|
| 222 |
if os.path.exists(models_file):
|
| 223 |
try:
|
| 224 |
models = joblib.load(models_file)
|
| 225 |
+
|
| 226 |
+
logger.info(f"Loaded trained field models from {models_file}")
|
| 227 |
+
logger.info(f" - location_classifier: {models.get('location_classifier') is not None}")
|
| 228 |
+
logger.info(f" - price_model: {models.get('price_model') is not None}")
|
| 229 |
+
logger.info(f" - price_patterns: {len(models.get('price_patterns', {}))} locations")
|
| 230 |
+
|
| 231 |
return models
|
| 232 |
+
|
| 233 |
except Exception as e:
|
| 234 |
logger.warning(f"Failed to load models from {models_file}: {e}")
|
| 235 |
|
| 236 |
+
logger.info("No trained field models found.")
|
| 237 |
+
logger.info(" To train: python scripts/download_training_data.py")
|
| 238 |
+
logger.info(" Then: python scripts/train_models.py")
|
| 239 |
|
| 240 |
# Return empty models dict as fallback
|
| 241 |
return {
|
|
|
|
| 244 |
"price_patterns": {},
|
| 245 |
"amenity_frequencies": {},
|
| 246 |
"embedder": None,
|
| 247 |
+
"scaler": None,
|
| 248 |
}
|
| 249 |
|
|
|
|
| 250 |
def infer_listing_type(self, state: Dict, user_role: str = None, user_message: str = None) -> Tuple[str, float]:
|
| 251 |
+
"""Intelligently infer listing_type"""
|
| 252 |
|
| 253 |
explicit_type = state.get("listing_type")
|
| 254 |
price_type = state.get("price_type")
|
| 255 |
|
| 256 |
+
# 1. If explicitly stated, use it
|
| 257 |
if explicit_type and explicit_type in ["rent", "short-stay", "sale", "roommate"]:
|
| 258 |
+
logger.info(f"Listing type explicit: {explicit_type}")
|
| 259 |
return explicit_type, 0.99
|
| 260 |
|
| 261 |
+
# 2. User role matters FIRST
|
| 262 |
if user_role:
|
| 263 |
user_role_lower = user_role.lower().strip()
|
| 264 |
|
| 265 |
# If user is renter, ALWAYS roommate
|
| 266 |
if user_role_lower == "renter":
|
| 267 |
+
logger.info("User is renter -> roommate listing")
|
| 268 |
return "roommate", 0.98
|
| 269 |
|
| 270 |
# If user is landlord, check other signals
|
| 271 |
if user_role_lower == "landlord":
|
|
|
|
| 272 |
if user_message:
|
| 273 |
msg_lower = user_message.lower()
|
| 274 |
sale_keywords = ["sell", "sale", "selling", "for sale", "purchase", "buy"]
|
| 275 |
|
| 276 |
if any(keyword in msg_lower for keyword in sale_keywords):
|
| 277 |
+
logger.info("Detected sale keywords -> sale")
|
| 278 |
return "sale", 0.95
|
| 279 |
|
| 280 |
+
# 3. Infer from price_type
|
| 281 |
if price_type:
|
| 282 |
price_type_lower = price_type.lower().strip()
|
| 283 |
|
| 284 |
# Short-stay indicators
|
| 285 |
if price_type_lower in ["nightly", "daily", "weekly", "night", "day", "week"]:
|
| 286 |
+
logger.info(f"Inferred short-stay from price_type: {price_type}")
|
| 287 |
return "short-stay", 0.95
|
| 288 |
|
| 289 |
+
# Monthly/Yearly = RENT
|
| 290 |
elif price_type_lower in ["monthly", "yearly", "month", "year", "mth", "yr"]:
|
| 291 |
+
logger.info(f"Inferred rent from price_type: {price_type}")
|
| 292 |
return "rent", 0.95
|
| 293 |
|
| 294 |
+
# 4. Default to rent
|
| 295 |
+
logger.warning("Could not infer listing_type, defaulting to rent")
|
| 296 |
return "rent", 0.5
|
| 297 |
|
|
|
|
| 298 |
async def infer_currency(self, state: Dict) -> Tuple[str, str, float]:
|
| 299 |
"""Intelligently infer currency from location"""
|
| 300 |
|
|
|
|
| 302 |
location = state.get("location")
|
| 303 |
listing_type = state.get("listing_type")
|
| 304 |
|
| 305 |
+
# 1. If explicitly stated
|
| 306 |
if explicit_currency and len(explicit_currency) == 3:
|
| 307 |
+
logger.info(f"Currency explicit: {explicit_currency}")
|
| 308 |
return explicit_currency, location, 0.99
|
| 309 |
|
| 310 |
+
# 2. Short-stay always in USD
|
| 311 |
if listing_type == "short-stay":
|
| 312 |
+
logger.info("Short-stay detected, using USD")
|
| 313 |
return "USD", location, 0.98
|
| 314 |
|
| 315 |
+
# 3. Infer from location
|
| 316 |
if location:
|
| 317 |
currency, country, city, confidence = await self.currency_mgr.get_currency_for_location(location)
|
| 318 |
|
| 319 |
if currency:
|
| 320 |
+
logger.info(f"Extracted city: {location} -> {city}, {country} -> {currency}")
|
| 321 |
state["location"] = city
|
| 322 |
return currency, city, confidence
|
| 323 |
|
| 324 |
+
logger.warning("Could not infer currency, will ask user")
|
| 325 |
return None, location, 0.0
|
| 326 |
|
|
|
|
| 327 |
async def convert_price_for_display(self, price: float, from_currency: str, to_currency: str = "USD") -> Dict:
|
| 328 |
"""Convert price using LIVE exchange rates"""
|
| 329 |
|
|
|
|
| 348 |
from_symbol = symbols.get(from_currency, from_currency)
|
| 349 |
to_symbol = symbols.get(to_currency, to_currency)
|
| 350 |
|
| 351 |
+
formatted = f"{from_symbol}{price:,} (approx {to_symbol}{display_price:,.2f})"
|
| 352 |
|
| 353 |
return {
|
| 354 |
"original_price": price,
|
|
|
|
| 360 |
}
|
| 361 |
|
| 362 |
except Exception as e:
|
| 363 |
+
logger.error(f"Failed to convert price: {e}")
|
| 364 |
|
| 365 |
return {
|
| 366 |
"original_price": price,
|
|
|
|
| 377 |
|
| 378 |
symbol_map = {
|
| 379 |
"USD": "$",
|
| 380 |
+
"EUR": "EUR",
|
| 381 |
+
"GBP": "GBP",
|
| 382 |
+
"NGN": "NGN",
|
| 383 |
+
"XOF": "XOF",
|
| 384 |
+
"KES": "KES",
|
| 385 |
+
"GHS": "GHS",
|
| 386 |
+
"ZAR": "ZAR",
|
| 387 |
+
"AED": "AED",
|
| 388 |
}
|
| 389 |
|
| 390 |
for currency in currencies:
|
|
|
|
| 448 |
"confidence": 0.93
|
| 449 |
}
|
| 450 |
|
| 451 |
+
logger.info(f"Extracted location: {address} -> {city}")
|
| 452 |
return city, location_info
|
| 453 |
|
| 454 |
except Exception as e:
|
| 455 |
+
logger.error(f"Failed to extract location from address: {e}")
|
| 456 |
|
| 457 |
return None, {}
|
| 458 |
|
|
|
|
| 459 |
def validate_field(self, field_name: str, value: any, user_input: str, user_id: str = None) -> Dict:
|
| 460 |
"""Validate a single field"""
|
| 461 |
|
app/ml/models/user_role_context_handler.py
ADDED
|
@@ -0,0 +1,607 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
user_role_context_handler.py - Handle different user roles and rental models
|
| 4 |
+
Supports: Airbnb (host/guest), African rentals (landlord/renter/tenant)
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Dict, Tuple, Optional
|
| 9 |
+
from enum import Enum
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class RentalModel(Enum):
|
| 16 |
+
"""Different rental models"""
|
| 17 |
+
AIRBNB = "airbnb" # Short-stay, host/guest model
|
| 18 |
+
AFRICAN_RENTAL = "african" # Long-term rent, landlord/tenant model
|
| 19 |
+
ROOMMATE = "roommate" # Room sharing in existing space
|
| 20 |
+
MIXED = "mixed" # Both types possible
|
| 21 |
+
UNKNOWN = "unknown"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class UserRole:
|
| 25 |
+
"""Handle different user roles across rental models"""
|
| 26 |
+
|
| 27 |
+
# Airbnb roles
|
| 28 |
+
AIRBNB_HOST = "airbnb_host"
|
| 29 |
+
AIRBNB_GUEST = "airbnb_guest"
|
| 30 |
+
|
| 31 |
+
# African rental roles
|
| 32 |
+
LANDLORD = "landlord"
|
| 33 |
+
RENTER = "renter"
|
| 34 |
+
TENANT = "tenant" # Alias for renter
|
| 35 |
+
|
| 36 |
+
# Roommate roles
|
| 37 |
+
HOMEOWNER_SEEKING_ROOMMATE = "homeowner_seeking_roommate" # Has space, looking for roommate
|
| 38 |
+
ROOMMATE_SEEKER = "roommate_seeker" # Looking for a room to share
|
| 39 |
+
|
| 40 |
+
# Generic
|
| 41 |
+
OWNER = "owner"
|
| 42 |
+
BUYER = "buyer"
|
| 43 |
+
SELLER = "seller"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class UserRoleDetector:
|
| 47 |
+
"""Intelligently detect user role from context"""
|
| 48 |
+
|
| 49 |
+
def __init__(self):
|
| 50 |
+
# Keywords for role detection
|
| 51 |
+
self.host_keywords = {
|
| 52 |
+
"airbnb": ["host", "hosting", "list my property", "list my place", "rent out", "share"],
|
| 53 |
+
"african": ["landlord", "owner", "property owner", "im renting out", "im listing"]
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
self.guest_keywords = {
|
| 57 |
+
"airbnb": ["guest", "book", "looking for place", "need accommodation", "airbnb"],
|
| 58 |
+
"african": ["renter", "tenant", "looking to rent", "seeking", "want to rent", "im looking for"]
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
self.buyer_keywords = ["buy", "purchase", "for sale", "selling", "acquire"]
|
| 62 |
+
self.seller_keywords = ["sell", "selling", "sale", "list for sale"]
|
| 63 |
+
|
| 64 |
+
# Roommate keywords
|
| 65 |
+
self.homeowner_seeking_roommate_keywords = [
|
| 66 |
+
"looking for a roommate", "need a roommate", "seeking roommate",
|
| 67 |
+
"want to share my", "have a spare room", "room available",
|
| 68 |
+
"looking to share", "share my apartment", "share my house",
|
| 69 |
+
"my place is too big", "extra room", "can share"
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
self.roommate_seeker_keywords = [
|
| 73 |
+
"looking for a room", "seeking a room", "need a room",
|
| 74 |
+
"looking for roommate", "want to share a place", "room for rent",
|
| 75 |
+
"share accommodation", "shared apartment", "shared house",
|
| 76 |
+
"need accommodation", "looking for a place to share"
|
| 77 |
+
]
|
| 78 |
+
|
| 79 |
+
logger.info("🔍 User Role Detector initialized")
|
| 80 |
+
|
| 81 |
+
def detect_rental_model(self, user_message: str, location: str = None) -> RentalModel:
|
| 82 |
+
"""Detect which rental model user is in"""
|
| 83 |
+
|
| 84 |
+
msg_lower = user_message.lower().strip()
|
| 85 |
+
|
| 86 |
+
# Keywords indicating Airbnb model
|
| 87 |
+
airbnb_indicators = ["airbnb", "short stay", "nightly", "daily", "vacation rental", "host"]
|
| 88 |
+
|
| 89 |
+
# Keywords indicating African rental model
|
| 90 |
+
african_indicators = ["landlord", "tenant", "renter", "monthly rent", "long term", "furnished room"]
|
| 91 |
+
|
| 92 |
+
# Keywords indicating roommate model
|
| 93 |
+
roommate_indicators = ["roommate", "share my", "spare room", "share apartment", "shared house", "share a place"]
|
| 94 |
+
|
| 95 |
+
# Check for explicit indicators
|
| 96 |
+
for indicator in roommate_indicators:
|
| 97 |
+
if indicator in msg_lower:
|
| 98 |
+
logger.info(f"🏘️ Detected roommate model: '{indicator}'")
|
| 99 |
+
return RentalModel.ROOMMATE
|
| 100 |
+
|
| 101 |
+
for indicator in airbnb_indicators:
|
| 102 |
+
if indicator in msg_lower:
|
| 103 |
+
logger.info(f"🏨 Detected Airbnb model: '{indicator}'")
|
| 104 |
+
return RentalModel.AIRBNB
|
| 105 |
+
|
| 106 |
+
for indicator in african_indicators:
|
| 107 |
+
if indicator in msg_lower:
|
| 108 |
+
logger.info(f"🏢 Detected African rental model: '{indicator}'")
|
| 109 |
+
return RentalModel.AFRICAN_RENTAL
|
| 110 |
+
|
| 111 |
+
# Location-based inference (African locations more likely = African model)
|
| 112 |
+
if location:
|
| 113 |
+
african_countries = ["benin", "nigeria", "kenya", "ghana", "south africa", "uganda", "senegal"]
|
| 114 |
+
if any(country in location.lower() for country in african_countries):
|
| 115 |
+
logger.info(f"📍 African location detected: {location}")
|
| 116 |
+
return RentalModel.AFRICAN_RENTAL
|
| 117 |
+
|
| 118 |
+
# Default to mixed
|
| 119 |
+
return RentalModel.MIXED
|
| 120 |
+
|
| 121 |
+
def detect_user_role(self, user_message: str, rental_model: RentalModel = None) -> Tuple[str, float]:
|
| 122 |
+
"""
|
| 123 |
+
Detect user role from message
|
| 124 |
+
Returns: (role, confidence)
|
| 125 |
+
"""
|
| 126 |
+
|
| 127 |
+
msg_lower = user_message.lower().strip()
|
| 128 |
+
|
| 129 |
+
if rental_model is None:
|
| 130 |
+
rental_model = self.detect_rental_model(user_message)
|
| 131 |
+
|
| 132 |
+
# ==================== SELLER / LANDLORD ====================
|
| 133 |
+
|
| 134 |
+
# Check for explicit landlord/owner language
|
| 135 |
+
landlord_explicit = ["im a landlord", "im the landlord", "i own", "i own this", "as a landlord"]
|
| 136 |
+
for phrase in landlord_explicit:
|
| 137 |
+
if phrase in msg_lower:
|
| 138 |
+
logger.info(f"✅ Explicit landlord detected: '{phrase}'")
|
| 139 |
+
return UserRole.LANDLORD, 0.99
|
| 140 |
+
|
| 141 |
+
# Check for listing/rental language
|
| 142 |
+
if rental_model == RentalModel.AFRICAN_RENTAL:
|
| 143 |
+
landlord_signals = [
|
| 144 |
+
"im listing", "list my", "im renting out", "property for rent",
|
| 145 |
+
"available for rent", "i have a", "i own a"
|
| 146 |
+
]
|
| 147 |
+
for signal in landlord_signals:
|
| 148 |
+
if signal in msg_lower:
|
| 149 |
+
logger.info(f"🏠 African landlord signal: '{signal}'")
|
| 150 |
+
return UserRole.LANDLORD, 0.90
|
| 151 |
+
|
| 152 |
+
if rental_model == RentalModel.AIRBNB:
|
| 153 |
+
host_signals = ["im hosting", "im a host", "list on airbnb", "airbnb host", "share my place"]
|
| 154 |
+
for signal in host_signals:
|
| 155 |
+
if signal in msg_lower:
|
| 156 |
+
logger.info(f"🏨 Airbnb host signal: '{signal}'")
|
| 157 |
+
return UserRole.AIRBNB_HOST, 0.90
|
| 158 |
+
|
| 159 |
+
# ==================== BUYER / SELLER (SALE) ====================
|
| 160 |
+
|
| 161 |
+
# Explicit sale language
|
| 162 |
+
seller_signals = ["im selling", "for sale", "sell my", "selling property", "list for sale"]
|
| 163 |
+
for signal in seller_signals:
|
| 164 |
+
if signal in msg_lower:
|
| 165 |
+
logger.info(f"💰 Seller detected: '{signal}'")
|
| 166 |
+
return UserRole.SELLER, 0.95
|
| 167 |
+
|
| 168 |
+
buyer_signals = ["want to buy", "looking to purchase", "im buying", "purchase property"]
|
| 169 |
+
for signal in buyer_signals:
|
| 170 |
+
if signal in msg_lower:
|
| 171 |
+
logger.info(f"💳 Buyer detected: '{signal}'")
|
| 172 |
+
return UserRole.BUYER, 0.95
|
| 173 |
+
|
| 174 |
+
# ==================== RENTER / GUEST ====================
|
| 175 |
+
|
| 176 |
+
# Check for explicit renter language
|
| 177 |
+
renter_explicit = ["im a tenant", "im a renter", "im looking to rent", "looking for a place to rent"]
|
| 178 |
+
for phrase in renter_explicit:
|
| 179 |
+
if phrase in msg_lower:
|
| 180 |
+
logger.info(f"✅ Explicit renter/tenant detected: '{phrase}'")
|
| 181 |
+
|
| 182 |
+
if rental_model == RentalModel.AFRICAN_RENTAL:
|
| 183 |
+
return UserRole.TENANT, 0.99
|
| 184 |
+
else:
|
| 185 |
+
return UserRole.AIRBNB_GUEST, 0.99
|
| 186 |
+
|
| 187 |
+
# ==================== ROOMMATE ROLES ====================
|
| 188 |
+
|
| 189 |
+
# Homeowner seeking roommate
|
| 190 |
+
for keyword in self.homeowner_seeking_roommate_keywords:
|
| 191 |
+
if keyword in msg_lower:
|
| 192 |
+
logger.info(f"✅ Homeowner seeking roommate detected: '{keyword}'")
|
| 193 |
+
return UserRole.HOMEOWNER_SEEKING_ROOMMATE, 0.90
|
| 194 |
+
|
| 195 |
+
# Roommate seeker
|
| 196 |
+
for keyword in self.roommate_seeker_keywords:
|
| 197 |
+
if keyword in msg_lower:
|
| 198 |
+
logger.info(f"✅ Roommate seeker detected: '{keyword}'")
|
| 199 |
+
return UserRole.ROOMMATE_SEEKER, 0.90
|
| 200 |
+
|
| 201 |
+
# Guest/renter signals
|
| 202 |
+
if rental_model == RentalModel.AFRICAN_RENTAL:
|
| 203 |
+
renter_signals = [
|
| 204 |
+
"looking for a", "need a", "seeking", "want to rent",
|
| 205 |
+
"im looking for", "show me", "what do you have", "available rooms"
|
| 206 |
+
]
|
| 207 |
+
for signal in renter_signals:
|
| 208 |
+
if signal in msg_lower:
|
| 209 |
+
logger.info(f"🔍 African renter signal: '{signal}'")
|
| 210 |
+
return UserRole.RENTER, 0.80
|
| 211 |
+
|
| 212 |
+
if rental_model == RentalModel.AIRBNB:
|
| 213 |
+
guest_signals = [
|
| 214 |
+
"looking for accommodation", "need a place", "book",
|
| 215 |
+
"where can i stay", "available places", "show me listings"
|
| 216 |
+
]
|
| 217 |
+
for signal in guest_signals:
|
| 218 |
+
if signal in msg_lower:
|
| 219 |
+
logger.info(f"🔍 Airbnb guest signal: '{signal}'")
|
| 220 |
+
return UserRole.AIRBNB_GUEST, 0.80
|
| 221 |
+
|
| 222 |
+
logger.warning(f"⚠️ Could not determine user role from: {user_message}")
|
| 223 |
+
return None, 0.0
|
| 224 |
+
|
| 225 |
+
def validate_role_consistency(self, user_role: str, rental_model: RentalModel) -> bool:
|
| 226 |
+
"""Validate that role matches rental model"""
|
| 227 |
+
|
| 228 |
+
valid_combinations = {
|
| 229 |
+
RentalModel.AIRBNB: [UserRole.AIRBNB_HOST, UserRole.AIRBNB_GUEST],
|
| 230 |
+
RentalModel.AFRICAN_RENTAL: [UserRole.LANDLORD, UserRole.RENTER, UserRole.TENANT],
|
| 231 |
+
RentalModel.ROOMMATE: [UserRole.HOMEOWNER_SEEKING_ROOMMATE, UserRole.ROOMMATE_SEEKER],
|
| 232 |
+
RentalModel.MIXED: [UserRole.LANDLORD, UserRole.RENTER, UserRole.TENANT,
|
| 233 |
+
UserRole.AIRBNB_HOST, UserRole.AIRBNB_GUEST,
|
| 234 |
+
UserRole.HOMEOWNER_SEEKING_ROOMMATE, UserRole.ROOMMATE_SEEKER],
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
valid = valid_combinations.get(rental_model, [])
|
| 238 |
+
|
| 239 |
+
if user_role in valid:
|
| 240 |
+
logger.info(f"✅ Role {user_role} valid for {rental_model.value}")
|
| 241 |
+
return True
|
| 242 |
+
|
| 243 |
+
logger.warning(f"⚠️ Role {user_role} may not match {rental_model.value}")
|
| 244 |
+
return False
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
class RoleBasedInferenceEngine:
|
| 248 |
+
"""Adapt inference based on user role and rental model"""
|
| 249 |
+
|
| 250 |
+
def __init__(self):
|
| 251 |
+
self.role_detector = UserRoleDetector()
|
| 252 |
+
logger.info("🧠 Role-based Inference Engine initialized")
|
| 253 |
+
|
| 254 |
+
def infer_listing_type(self, state: Dict, user_message: str, rental_model: RentalModel = None) -> Tuple[str, float]:
|
| 255 |
+
"""
|
| 256 |
+
Infer listing type based on user role and rental model
|
| 257 |
+
|
| 258 |
+
Returns: (listing_type, confidence)
|
| 259 |
+
"""
|
| 260 |
+
|
| 261 |
+
# Detect rental model
|
| 262 |
+
if rental_model is None:
|
| 263 |
+
rental_model = self.role_detector.detect_rental_model(user_message, state.get("location"))
|
| 264 |
+
|
| 265 |
+
# Detect user role
|
| 266 |
+
user_role, role_confidence = self.role_detector.detect_user_role(user_message, rental_model)
|
| 267 |
+
|
| 268 |
+
logger.info(f"🔍 Rental Model: {rental_model.value}")
|
| 269 |
+
logger.info(f"👤 User Role: {user_role} (confidence: {role_confidence:.0%})")
|
| 270 |
+
|
| 271 |
+
# Store in state for later use
|
| 272 |
+
state["rental_model"] = rental_model.value
|
| 273 |
+
state["user_role"] = user_role
|
| 274 |
+
|
| 275 |
+
# ==================== AIRBNB MODEL ====================
|
| 276 |
+
|
| 277 |
+
if rental_model == RentalModel.AIRBNB:
|
| 278 |
+
|
| 279 |
+
# Host listing = short-stay
|
| 280 |
+
if user_role == UserRole.AIRBNB_HOST:
|
| 281 |
+
logger.info("📍 Host → short-stay listing")
|
| 282 |
+
return "short-stay", 0.98
|
| 283 |
+
|
| 284 |
+
# Guest searching = just needs to search
|
| 285 |
+
if user_role == UserRole.AIRBNB_GUEST:
|
| 286 |
+
logger.info("📍 Guest → searching for short-stay")
|
| 287 |
+
return "short-stay", 0.95
|
| 288 |
+
|
| 289 |
+
# ==================== AFRICAN RENTAL MODEL ====================
|
| 290 |
+
|
| 291 |
+
elif rental_model == RentalModel.AFRICAN_RENTAL:
|
| 292 |
+
|
| 293 |
+
# Landlord listing = rent listing
|
| 294 |
+
if user_role in [UserRole.LANDLORD, UserRole.OWNER]:
|
| 295 |
+
logger.info("📍 Landlord → rent listing")
|
| 296 |
+
return "rent", 0.98
|
| 297 |
+
|
| 298 |
+
# Renter/tenant searching = rent listing
|
| 299 |
+
if user_role in [UserRole.RENTER, UserRole.TENANT]:
|
| 300 |
+
logger.info("📍 Tenant/Renter → searching for rent")
|
| 301 |
+
return "rent", 0.95
|
| 302 |
+
|
| 303 |
+
# ==================== ROOMMATE MODEL ====================
|
| 304 |
+
|
| 305 |
+
elif rental_model == RentalModel.ROOMMATE:
|
| 306 |
+
|
| 307 |
+
# Homeowner seeking roommate = roommate listing
|
| 308 |
+
if user_role == UserRole.HOMEOWNER_SEEKING_ROOMMATE:
|
| 309 |
+
logger.info("📍 Homeowner → roommate listing")
|
| 310 |
+
return "roommate", 0.98
|
| 311 |
+
|
| 312 |
+
# Roommate seeker = searching roommate
|
| 313 |
+
if user_role == UserRole.ROOMMATE_SEEKER:
|
| 314 |
+
logger.info("📍 Roommate seeker → searching for roommate")
|
| 315 |
+
return "roommate", 0.95
|
| 316 |
+
|
| 317 |
+
# ==================== SALE MODEL (both) ====================
|
| 318 |
+
|
| 319 |
+
if user_role == UserRole.SELLER:
|
| 320 |
+
logger.info("📍 Seller → sale listing")
|
| 321 |
+
return "sale", 0.98
|
| 322 |
+
|
| 323 |
+
if user_role == UserRole.BUYER:
|
| 324 |
+
logger.info("📍 Buyer → searching for sale")
|
| 325 |
+
return "sale", 0.95
|
| 326 |
+
|
| 327 |
+
# Fallback: check explicit listing_type
|
| 328 |
+
explicit_type = state.get("listing_type")
|
| 329 |
+
if explicit_type:
|
| 330 |
+
logger.info(f"📍 Using explicit listing_type: {explicit_type}")
|
| 331 |
+
return explicit_type, 0.85
|
| 332 |
+
|
| 333 |
+
logger.warning("⚠️ Could not infer listing_type, defaulting to rent")
|
| 334 |
+
return "rent", 0.5
|
| 335 |
+
|
| 336 |
+
def adapt_field_extraction(self, state: Dict, user_message: str) -> Dict:
|
| 337 |
+
"""
|
| 338 |
+
Adapt field extraction based on user role and rental model
|
| 339 |
+
"""
|
| 340 |
+
|
| 341 |
+
rental_model = self.role_detector.detect_rental_model(user_message, state.get("location"))
|
| 342 |
+
user_role, _ = self.role_detector.detect_user_role(user_message, rental_model)
|
| 343 |
+
|
| 344 |
+
extraction_config = {
|
| 345 |
+
"rental_model": rental_model.value,
|
| 346 |
+
"user_role": user_role,
|
| 347 |
+
"required_fields": [],
|
| 348 |
+
"price_type_suggestions": [],
|
| 349 |
+
"amenity_focus": [],
|
| 350 |
+
"validation_rules": []
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
# ==================== AIRBNB HOST ====================
|
| 354 |
+
if user_role == UserRole.AIRBNB_HOST:
|
| 355 |
+
extraction_config["required_fields"] = [
|
| 356 |
+
"location", "bedrooms", "bathrooms", "price", "amenities"
|
| 357 |
+
]
|
| 358 |
+
extraction_config["price_type_suggestions"] = ["nightly", "daily", "weekly"]
|
| 359 |
+
extraction_config["amenity_focus"] = ["wifi", "parking", "pool", "kitchen", "ac"]
|
| 360 |
+
extraction_config["validation_rules"] = [
|
| 361 |
+
"price must be per night (nightly/daily)",
|
| 362 |
+
"bedrooms minimum 1",
|
| 363 |
+
"bathrooms can be shared"
|
| 364 |
+
]
|
| 365 |
+
|
| 366 |
+
# ==================== AIRBNB GUEST ====================
|
| 367 |
+
elif user_role == UserRole.AIRBNB_GUEST:
|
| 368 |
+
extraction_config["required_fields"] = ["location", "check_in", "check_out"]
|
| 369 |
+
extraction_config["price_type_suggestions"] = ["nightly"]
|
| 370 |
+
extraction_config["amenity_focus"] = ["wifi", "kitchen", "parking"]
|
| 371 |
+
extraction_config["validation_rules"] = [
|
| 372 |
+
"check dates for availability",
|
| 373 |
+
"show prices in nightly rates"
|
| 374 |
+
]
|
| 375 |
+
|
| 376 |
+
# ==================== LANDLORD (African) ====================
|
| 377 |
+
elif user_role == UserRole.LANDLORD:
|
| 378 |
+
extraction_config["required_fields"] = [
|
| 379 |
+
"location", "bedrooms", "bathrooms", "price", "price_type", "furnished"
|
| 380 |
+
]
|
| 381 |
+
extraction_config["price_type_suggestions"] = ["monthly", "yearly"]
|
| 382 |
+
extraction_config["amenity_focus"] = [
|
| 383 |
+
"furnished", "kitchen", "water", "electricity", "security"
|
| 384 |
+
]
|
| 385 |
+
extraction_config["validation_rules"] = [
|
| 386 |
+
"price must be monthly or yearly",
|
| 387 |
+
"specify if furnished/unfurnished",
|
| 388 |
+
"include utility info if available",
|
| 389 |
+
"bedrooms and bathrooms required"
|
| 390 |
+
]
|
| 391 |
+
|
| 392 |
+
# ==================== RENTER/TENANT (African) ====================
|
| 393 |
+
elif user_role in [UserRole.RENTER, UserRole.TENANT]:
|
| 394 |
+
extraction_config["required_fields"] = [
|
| 395 |
+
"location", "budget", "bedrooms", "price_type"
|
| 396 |
+
]
|
| 397 |
+
extraction_config["price_type_suggestions"] = ["monthly", "yearly"]
|
| 398 |
+
extraction_config["amenity_focus"] = [
|
| 399 |
+
"furnished", "security", "water", "electricity", "parking"
|
| 400 |
+
]
|
| 401 |
+
extraction_config["validation_rules"] = [
|
| 402 |
+
"show monthly/yearly prices",
|
| 403 |
+
"filter by budget",
|
| 404 |
+
"highlight furnished options",
|
| 405 |
+
"show security features"
|
| 406 |
+
]
|
| 407 |
+
|
| 408 |
+
# ==================== HOMEOWNER SEEKING ROOMMATE ====================
|
| 409 |
+
elif user_role == UserRole.HOMEOWNER_SEEKING_ROOMMATE:
|
| 410 |
+
extraction_config["required_fields"] = [
|
| 411 |
+
"location", "bedrooms_available", "bathrooms_available", "price", "price_type"
|
| 412 |
+
]
|
| 413 |
+
extraction_config["price_type_suggestions"] = ["monthly", "yearly"]
|
| 414 |
+
extraction_config["amenity_focus"] = [
|
| 415 |
+
"furnished", "utilities_included", "kitchen_access", "laundry",
|
| 416 |
+
"internet", "parking", "living_room_access"
|
| 417 |
+
]
|
| 418 |
+
extraction_config["validation_rules"] = [
|
| 419 |
+
"price must be monthly or yearly",
|
| 420 |
+
"specify which rooms are available",
|
| 421 |
+
"describe house/apartment condition",
|
| 422 |
+
"list utilities included",
|
| 423 |
+
"mention house rules"
|
| 424 |
+
]
|
| 425 |
+
|
| 426 |
+
# ==================== ROOMMATE SEEKER ====================
|
| 427 |
+
elif user_role == UserRole.ROOMMATE_SEEKER:
|
| 428 |
+
extraction_config["required_fields"] = [
|
| 429 |
+
"location", "budget", "move_in_date"
|
| 430 |
+
]
|
| 431 |
+
extraction_config["price_type_suggestions"] = ["monthly", "yearly"]
|
| 432 |
+
extraction_config["amenity_focus"] = [
|
| 433 |
+
"furnished", "utilities_included", "kitchen_access", "internet",
|
| 434 |
+
"parking", "proximity_to_work"
|
| 435 |
+
]
|
| 436 |
+
extraction_config["validation_rules"] = [
|
| 437 |
+
"show monthly/yearly prices",
|
| 438 |
+
"filter by budget",
|
| 439 |
+
"check roommate compatibility",
|
| 440 |
+
"show lease terms"
|
| 441 |
+
]
|
| 442 |
+
|
| 443 |
+
# ==================== SELLER ====================
|
| 444 |
+
elif user_role == UserRole.SELLER:
|
| 445 |
+
extraction_config["required_fields"] = [
|
| 446 |
+
"location", "bedrooms", "bathrooms", "price", "property_type"
|
| 447 |
+
]
|
| 448 |
+
extraction_config["price_type_suggestions"] = ["fixed"]
|
| 449 |
+
extraction_config["amenity_focus"] = ["land size", "property type", "condition"]
|
| 450 |
+
extraction_config["validation_rules"] = [
|
| 451 |
+
"price is total sale price",
|
| 452 |
+
"property type required (apartment, house, etc)",
|
| 453 |
+
"include land/property size if known"
|
| 454 |
+
]
|
| 455 |
+
|
| 456 |
+
# ==================== BUYER ====================
|
| 457 |
+
elif user_role == UserRole.BUYER:
|
| 458 |
+
extraction_config["required_fields"] = [
|
| 459 |
+
"location", "budget", "bedrooms", "property_type"
|
| 460 |
+
]
|
| 461 |
+
extraction_config["price_type_suggestions"] = []
|
| 462 |
+
extraction_config["amenity_focus"] = ["property type", "land size", "condition"]
|
| 463 |
+
extraction_config["validation_rules"] = [
|
| 464 |
+
"show total sale prices",
|
| 465 |
+
"filter by budget range",
|
| 466 |
+
"group by property type"
|
| 467 |
+
]
|
| 468 |
+
|
| 469 |
+
logger.info(f"✅ Extraction config adapted for {user_role}")
|
| 470 |
+
|
| 471 |
+
return extraction_config
|
| 472 |
+
|
| 473 |
+
def get_role_context_prompt(self, user_role: str, rental_model: str) -> str:
|
| 474 |
+
"""Get AI prompt context based on role"""
|
| 475 |
+
|
| 476 |
+
prompts = {
|
| 477 |
+
UserRole.AIRBNB_HOST: """
|
| 478 |
+
You are helping an Airbnb host list their property.
|
| 479 |
+
- Focus on: short-stay rental features, nightly rates, guest amenities
|
| 480 |
+
- Price type: nightly/daily/weekly
|
| 481 |
+
- Emphasize: WiFi, kitchen, parking, cleanliness
|
| 482 |
+
""",
|
| 483 |
+
|
| 484 |
+
UserRole.AIRBNB_GUEST: """
|
| 485 |
+
You are helping someone find an Airbnb accommodation.
|
| 486 |
+
- Focus on: guest experience, amenities, location convenience
|
| 487 |
+
- Price type: show nightly rates
|
| 488 |
+
- Emphasize: cleanliness, safety, host responsiveness
|
| 489 |
+
""",
|
| 490 |
+
|
| 491 |
+
UserRole.LANDLORD: """
|
| 492 |
+
You are helping an African landlord/property owner list a rental.
|
| 493 |
+
- Focus on: long-term rental (monthly/yearly), tenant features, property durability
|
| 494 |
+
- Price type: monthly or yearly
|
| 495 |
+
- Emphasize: furnished/unfurnished, utilities, security, maintenance
|
| 496 |
+
- Include: lease terms, deposit requirements
|
| 497 |
+
""",
|
| 498 |
+
|
| 499 |
+
UserRole.RENTER: """
|
| 500 |
+
You are helping a tenant/renter find an apartment or room.
|
| 501 |
+
- Focus on: long-term rental suitability, affordability, amenities for living
|
| 502 |
+
- Price type: monthly or yearly budget
|
| 503 |
+
- Emphasize: security, utilities included, furnished options, commute
|
| 504 |
+
- Ask about: move-in date, lease length, budget
|
| 505 |
+
""",
|
| 506 |
+
|
| 507 |
+
UserRole.TENANT: """
|
| 508 |
+
You are helping a tenant/renter find an apartment or room.
|
| 509 |
+
- Focus on: long-term rental suitability, affordability, amenities for living
|
| 510 |
+
- Price type: monthly or yearly budget
|
| 511 |
+
- Emphasize: security, utilities included, furnished options, commute
|
| 512 |
+
- Ask about: move-in date, lease length, budget
|
| 513 |
+
""",
|
| 514 |
+
|
| 515 |
+
UserRole.SELLER: """
|
| 516 |
+
You are helping someone sell a property.
|
| 517 |
+
- Focus on: property value, unique features, condition, potential
|
| 518 |
+
- Price type: total sale price
|
| 519 |
+
- Emphasize: location, size, renovations, investment potential
|
| 520 |
+
- Include: property history, legal documents status
|
| 521 |
+
""",
|
| 522 |
+
|
| 523 |
+
UserRole.BUYER: """
|
| 524 |
+
You are helping someone find and purchase a property.
|
| 525 |
+
- Focus on: property value, investment potential, location
|
| 526 |
+
- Price type: show total purchase price
|
| 527 |
+
- Emphasize: property condition, neighborhood, future value
|
| 528 |
+
- Include: financing options, inspection recommendations
|
| 529 |
+
""",
|
| 530 |
+
|
| 531 |
+
UserRole.HOMEOWNER_SEEKING_ROOMMATE: """
|
| 532 |
+
You are helping someone find a roommate to share their home with.
|
| 533 |
+
- Focus on: compatibility, house/apartment details, shared spaces
|
| 534 |
+
- Price type: monthly or yearly
|
| 535 |
+
- Emphasize: house rules, utilities included, available rooms, amenities
|
| 536 |
+
- Include: lease terms, deposit, move-in date, roommate preferences
|
| 537 |
+
- Ask about: their lifestyle, work schedule, cleanliness standards
|
| 538 |
+
""",
|
| 539 |
+
|
| 540 |
+
UserRole.ROOMMATE_SEEKER: """
|
| 541 |
+
You are helping someone find a room to share with a roommate.
|
| 542 |
+
- Focus on: affordability, roommate compatibility, location, utilities
|
| 543 |
+
- Price type: monthly or yearly budget
|
| 544 |
+
- Emphasize: house rules, amenities, commute, lifestyle fit
|
| 545 |
+
- Include: move-in date, lease length, deposit requirements
|
| 546 |
+
- Ask about: budget, preferred location, work/study location, lifestyle
|
| 547 |
+
"""
|
| 548 |
+
}
|
| 549 |
+
|
| 550 |
+
return prompts.get(user_role, "")
|
| 551 |
+
|
| 552 |
+
|
| 553 |
+
# ==================== EXAMPLE USAGE ====================
|
| 554 |
+
|
| 555 |
+
if __name__ == "__main__":
|
| 556 |
+
logging.basicConfig(level=logging.INFO)
|
| 557 |
+
|
| 558 |
+
engine = RoleBasedInferenceEngine()
|
| 559 |
+
|
| 560 |
+
# Test cases
|
| 561 |
+
test_cases = [
|
| 562 |
+
# Airbnb host
|
| 563 |
+
("I'm a host on Airbnb and want to list my apartment in Lagos", "Lagos"),
|
| 564 |
+
|
| 565 |
+
# Airbnb guest
|
| 566 |
+
("I'm looking for accommodation on Airbnb in Accra next week", "Accra"),
|
| 567 |
+
|
| 568 |
+
# African landlord
|
| 569 |
+
("I'm a landlord in Cotonou with a 2-bedroom apartment for monthly rent", "Cotonou"),
|
| 570 |
+
|
| 571 |
+
# African tenant
|
| 572 |
+
("I'm looking to rent a furnished room in Nairobi, my budget is 30000 KES per month", "Nairobi"),
|
| 573 |
+
|
| 574 |
+
# Homeowner seeking roommate
|
| 575 |
+
("My house in Lagos is too big for just me. I have 2 extra bedrooms and want to share", "Lagos"),
|
| 576 |
+
|
| 577 |
+
# Roommate seeker
|
| 578 |
+
("I'm looking for a room to share in Accra, somewhere near my workplace", "Accra"),
|
| 579 |
+
|
| 580 |
+
# Seller
|
| 581 |
+
("I want to sell my house in Lagos for 50 million NGN", "Lagos"),
|
| 582 |
+
|
| 583 |
+
# Buyer
|
| 584 |
+
("I'm looking to buy a 3-bedroom apartment in Cape Town", "Cape Town"),
|
| 585 |
+
]
|
| 586 |
+
|
| 587 |
+
print("\n" + "="*70)
|
| 588 |
+
print("🧠 ROLE-BASED INFERENCE ENGINE TEST")
|
| 589 |
+
print("="*70 + "\n")
|
| 590 |
+
|
| 591 |
+
for message, location in test_cases:
|
| 592 |
+
print(f"📝 Message: {message}")
|
| 593 |
+
print(f"📍 Location: {location}\n")
|
| 594 |
+
|
| 595 |
+
state = {"location": location}
|
| 596 |
+
listing_type, confidence = engine.infer_listing_type(state, message)
|
| 597 |
+
|
| 598 |
+
print(f"✅ Listing Type: {listing_type} (confidence: {confidence:.0%})")
|
| 599 |
+
|
| 600 |
+
config = engine.adapt_field_extraction(state, message)
|
| 601 |
+
print(f"📋 Required fields: {', '.join(config['required_fields'])}")
|
| 602 |
+
print(f"💰 Price types: {', '.join(config['price_type_suggestions'])}")
|
| 603 |
+
|
| 604 |
+
prompt = engine.get_role_context_prompt(config['user_role'], config['rental_model'])
|
| 605 |
+
print(f"🎯 AI Context:\n{prompt}")
|
| 606 |
+
|
| 607 |
+
print("-" * 70 + "\n")
|
app/ml/trainning/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""ML training module"""
|
app/ml/trainning/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (223 Bytes). View file
|
|
|
app/ml/trainning/__pycache__/hf_dataset_downloader.cpython-313.pyc
ADDED
|
Binary file (11.6 kB). View file
|
|
|
app/ml/trainning/__pycache__/train_enhanced_model.cpython-313.pyc
ADDED
|
Binary file (7.45 kB). View file
|
|
|
app/ml/trainning/hf_dataset_downloader.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
hf_dataset_downloader.py – bullet-proof, real-tabular real-estate data
|
| 4 |
+
Run: python scripts/download_training_data.py
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os, json, logging, pandas as pd, numpy as np
|
| 8 |
+
from typing import List, Tuple, Optional
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(message)s")
|
| 11 |
+
log = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
CACHE = "./hf_datasets"
|
| 14 |
+
OUT = "app/ml/models"
|
| 15 |
+
|
| 16 |
+
# ------------------------------------------------ one working source ---
|
| 17 |
+
SOURCES: List[Tuple[str, str, str]] = [
|
| 18 |
+
("california_housing", "csv", "https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv"),
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
COL_MAP = {
|
| 22 |
+
"price": ["price", "median_house_value", "sale_price", "cost"],
|
| 23 |
+
"bedrooms": ["bedrooms", "total_bedrooms", "beds"],
|
| 24 |
+
"bathrooms": ["bathrooms", "total_bathrooms", "baths"],
|
| 25 |
+
"location": ["location", "ocean_proximity", "city", "address", "region"],
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
# -----------------------------------------------------------------------
|
| 29 |
+
class HuggingFaceDatasetDownloader:
|
| 30 |
+
def __init__(self, cache_dir: str = CACHE, output_dir: str = OUT):
|
| 31 |
+
self.cache_dir = cache_dir
|
| 32 |
+
self.output_dir = output_dir
|
| 33 |
+
os.makedirs(cache_dir, exist_ok=True)
|
| 34 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 35 |
+
|
| 36 |
+
# ------------ fetch -------------------------------------------------
|
| 37 |
+
def _get_csv(self, url: str) -> Optional[pd.DataFrame]:
|
| 38 |
+
try:
|
| 39 |
+
df = pd.read_csv(url)
|
| 40 |
+
log.info("CSV rows=%d url=%.60s", len(df), url)
|
| 41 |
+
return df
|
| 42 |
+
except Exception as e:
|
| 43 |
+
log.error("CSV fail: %s", e)
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
+
# ------------ normalise ---------------------------------------------
|
| 47 |
+
def _normalise(self, df: pd.DataFrame, name: str) -> Optional[pd.DataFrame]:
|
| 48 |
+
log.info("Normalising %s …", name)
|
| 49 |
+
print(f"\n=== {name} COLUMNS ===\n{list(df.columns)}\n{df.head(2)}")
|
| 50 |
+
|
| 51 |
+
out = pd.DataFrame()
|
| 52 |
+
for std, variants in COL_MAP.items():
|
| 53 |
+
for v in variants:
|
| 54 |
+
if v in df.columns:
|
| 55 |
+
out[std] = df[v]
|
| 56 |
+
log.info(" mapped %s → %s", v, std)
|
| 57 |
+
break
|
| 58 |
+
|
| 59 |
+
# price is mandatory
|
| 60 |
+
if out.get("price") is None:
|
| 61 |
+
log.warning("No price column in %s", name)
|
| 62 |
+
return None
|
| 63 |
+
out["price"] = pd.to_numeric(out["price"], errors="coerce").dropna()
|
| 64 |
+
if out["price"].empty:
|
| 65 |
+
log.warning("Price column empty after coerce")
|
| 66 |
+
return None
|
| 67 |
+
|
| 68 |
+
# safe fall-backs (Series, not scalars)
|
| 69 |
+
if "location" not in out.columns:
|
| 70 |
+
out["location"] = "Unknown"
|
| 71 |
+
out["location"] = out["location"].fillna("Unknown")
|
| 72 |
+
|
| 73 |
+
# bedrooms / bathrooms – create Series first, then fillna
|
| 74 |
+
bedrooms_series = pd.to_numeric(out.get("bedrooms", 2), errors="coerce")
|
| 75 |
+
bathrooms_series = pd.to_numeric(out.get("bathrooms", 1), errors="coerce")
|
| 76 |
+
|
| 77 |
+
out["bedrooms"] = bedrooms_series.fillna(2).astype(int)
|
| 78 |
+
out["bathrooms"] = bathrooms_series.fillna(1).astype(int)
|
| 79 |
+
|
| 80 |
+
out["dataset_source"] = name
|
| 81 |
+
out = out.dropna(subset=["price"])
|
| 82 |
+
log.info("Normalised → %d rows", len(out))
|
| 83 |
+
return out
|
| 84 |
+
|
| 85 |
+
# ------------ combine ----------------------------------------------
|
| 86 |
+
def _combine(self, frames: List[Tuple[str, pd.DataFrame]]) -> pd.DataFrame:
|
| 87 |
+
cleaned = [self._normalise(df, name) for name, df in frames if df is not None]
|
| 88 |
+
cleaned = [c for c in cleaned if c is not None and len(c)]
|
| 89 |
+
if not cleaned:
|
| 90 |
+
log.warning("No valid frames – creating minimal dummy so training can run")
|
| 91 |
+
dummy = pd.DataFrame({
|
| 92 |
+
"price": [250_000, 350_000, 450_000],
|
| 93 |
+
"bedrooms": [2, 3, 4],
|
| 94 |
+
"bathrooms": [1, 2, 3],
|
| 95 |
+
"location": ["Unknown", "Unknown", "Unknown"],
|
| 96 |
+
"dataset_source": ["dummy"] * 3,
|
| 97 |
+
})
|
| 98 |
+
return dummy
|
| 99 |
+
final = pd.concat(cleaned, ignore_index=True)
|
| 100 |
+
log.info("Combined → %d rows", len(final))
|
| 101 |
+
return final
|
| 102 |
+
|
| 103 |
+
# ------------ augment ----------------------------------------------
|
| 104 |
+
def _augment(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 105 |
+
log.info("Augmenting …")
|
| 106 |
+
df["price_per_bedroom"] = df["price"] / df["bedrooms"].clip(1)
|
| 107 |
+
df["price_per_bathroom"] = df["price"] / df["bathrooms"].clip(1)
|
| 108 |
+
df["total_rooms"] = df["bedrooms"] + df["bathrooms"]
|
| 109 |
+
df["price_range"] = pd.cut(
|
| 110 |
+
df["price"],
|
| 111 |
+
bins=[0, 200_000, 500_000, 1_000_000, np.inf],
|
| 112 |
+
labels=["low", "mid", "high", "luxury"],
|
| 113 |
+
)
|
| 114 |
+
return df
|
| 115 |
+
|
| 116 |
+
# ------------ save --------------------------------------------------
|
| 117 |
+
def _save(self, df: pd.DataFrame, basename: str = "combined_training_data"):
|
| 118 |
+
csv = f"{self.output_dir}/{basename}.csv"
|
| 119 |
+
parq = f"{self.output_dir}/{basename}.parquet"
|
| 120 |
+
jsnl = f"{self.output_dir}/{basename}.jsonl"
|
| 121 |
+
|
| 122 |
+
df.to_csv(csv, index=False)
|
| 123 |
+
df.to_parquet(parq, index=False)
|
| 124 |
+
with open(jsnl, "w", encoding="utf-8") as f:
|
| 125 |
+
for rec in df.to_dict(orient="records"):
|
| 126 |
+
f.write(json.dumps(rec, default=str) + "\n")
|
| 127 |
+
|
| 128 |
+
log.info("Saved CSV : %s", csv)
|
| 129 |
+
log.info("Saved Parq : %s", parq)
|
| 130 |
+
log.info("Saved JSONL: %s", jsnl)
|
| 131 |
+
return {"csv": csv, "parquet": parq, "jsonl": jsnl}
|
| 132 |
+
|
| 133 |
+
# ------------ stats -------------------------------------------------
|
| 134 |
+
def _save_info(self, df: pd.DataFrame):
|
| 135 |
+
info = {
|
| 136 |
+
"rows": len(df),
|
| 137 |
+
"cols": list(df.columns),
|
| 138 |
+
"price": {"mean": float(df["price"].mean()), "median": float(df["price"].median()),
|
| 139 |
+
"min": float(df["price"].min()), "max": float(df["price"].max())},
|
| 140 |
+
"bedrooms": {"mean": float(df["bedrooms"].mean()), "min": int(df["bedrooms"].min()), "max": int(df["bedrooms"].max())},
|
| 141 |
+
"bathrooms": {"mean": float(df["bathrooms"].mean()), "min": int(df["bathrooms"].min()), "max": int(df["bathrooms"].max())},
|
| 142 |
+
"locations": df["location"].nunique(),
|
| 143 |
+
}
|
| 144 |
+
path = f"{self.output_dir}/dataset_info.json"
|
| 145 |
+
with open(path, "w") as f:
|
| 146 |
+
json.dump(info, f, indent=2)
|
| 147 |
+
log.info("Stats → %s", path)
|
| 148 |
+
|
| 149 |
+
# ------------ main --------------------------------------------------
|
| 150 |
+
def download_and_prepare(self) -> pd.DataFrame:
|
| 151 |
+
print("\n" + "=" * 80)
|
| 152 |
+
print("REAL-ESTATE TABULAR DOWNLOADER (BULLET-PROOF)")
|
| 153 |
+
print("=" * 80 + "\n")
|
| 154 |
+
|
| 155 |
+
raw: List[Tuple[str, pd.DataFrame]] = []
|
| 156 |
+
for name, typ, src in SOURCES:
|
| 157 |
+
log.info("Getting %s …", name)
|
| 158 |
+
if typ == "csv":
|
| 159 |
+
raw.append((name, self._get_csv(src)))
|
| 160 |
+
else:
|
| 161 |
+
log.warning("Unknown type %s", typ)
|
| 162 |
+
|
| 163 |
+
raw = [(n, d) for n, d in raw if d is not None]
|
| 164 |
+
combined = self._combine(raw)
|
| 165 |
+
combined = self._augment(combined)
|
| 166 |
+
paths = self._save(combined)
|
| 167 |
+
self._save_info(combined)
|
| 168 |
+
|
| 169 |
+
print("\n" + "=" * 80)
|
| 170 |
+
print("DOWNLOAD COMPLETE")
|
| 171 |
+
print("=" * 80)
|
| 172 |
+
print(f"Rows : {len(combined):,}")
|
| 173 |
+
print(f"Files: {paths}")
|
| 174 |
+
print("\nNext → python scripts/train_models.py\n")
|
| 175 |
+
return combined
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
if __name__ == "__main__":
|
| 179 |
+
HuggingFaceDatasetDownloader().download_and_prepare()
|
app/ml/trainning/train_enhanced_model.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
train_enhanced_model.py – train on real tabular data
|
| 4 |
+
Run: python scripts/train_models.py
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json, os, joblib, logging, pandas as pd, numpy as np
|
| 8 |
+
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
|
| 9 |
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
| 10 |
+
from sklearn.model_selection import train_test_split
|
| 11 |
+
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score
|
| 12 |
+
from sentence_transformers import SentenceTransformer
|
| 13 |
+
|
| 14 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(message)s")
|
| 15 |
+
log = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
MODEL_DIR = "app/ml/models"
|
| 18 |
+
DATA_FILE = f"{MODEL_DIR}/combined_training_data.parquet"
|
| 19 |
+
MODEL_PATH = f"{MODEL_DIR}/field_models.pkl"
|
| 20 |
+
|
| 21 |
+
class EnhancedModelTrainer:
|
| 22 |
+
def __init__(self, model_dir: str = MODEL_DIR, data_file: str = DATA_FILE):
|
| 23 |
+
self.model_dir = model_dir
|
| 24 |
+
self.data_file = data_file
|
| 25 |
+
os.makedirs(model_dir, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
# ------------ load ----------------------------------------------
|
| 28 |
+
def _load(self) -> pd.DataFrame | None:
|
| 29 |
+
if not os.path.exists(self.data_file):
|
| 30 |
+
log.error("Data not found → run download_training_data.py first")
|
| 31 |
+
return None
|
| 32 |
+
df = pd.read_parquet(self.data_file)
|
| 33 |
+
log.info("Loaded %d rows", len(df))
|
| 34 |
+
return df
|
| 35 |
+
|
| 36 |
+
# ------------ prep ----------------------------------------------
|
| 37 |
+
def _prep(self, df: pd.DataFrame):
|
| 38 |
+
log.info("Preparing features …")
|
| 39 |
+
X_num = df[["bedrooms", "bathrooms", "price_per_bedroom", "total_rooms"]].fillna(0)
|
| 40 |
+
self.scaler = StandardScaler().fit(X_num)
|
| 41 |
+
|
| 42 |
+
# embeddings from location string
|
| 43 |
+
self.embedder = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="./models")
|
| 44 |
+
X_text = self.embedder.encode(df["location"].astype(str).tolist())
|
| 45 |
+
|
| 46 |
+
X = np.hstack([self.scaler.transform(X_num), X_text])
|
| 47 |
+
y_price = df["price"].values
|
| 48 |
+
y_loc = LabelEncoder().fit_transform(df["location"])
|
| 49 |
+
return X, y_price, y_loc
|
| 50 |
+
|
| 51 |
+
# ------------ train ---------------------------------------------
|
| 52 |
+
def _train(self, X, y_price, y_loc, df: pd.DataFrame):
|
| 53 |
+
log.info("Training models …")
|
| 54 |
+
X_train, X_test, yp_train, yp_test, yl_train, yl_test = train_test_split(
|
| 55 |
+
X, y_price, y_loc, test_size=0.2, random_state=42
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# 1. price regressor
|
| 59 |
+
price_model = RandomForestRegressor(
|
| 60 |
+
n_estimators=300, max_depth=20, random_state=42, n_jobs=-1
|
| 61 |
+
)
|
| 62 |
+
price_model.fit(X_train, yp_train)
|
| 63 |
+
y_pred = price_model.predict(X_test)
|
| 64 |
+
log.info("Price R² = %.3f MAE = $%.0f", r2_score(yp_test, y_pred), mean_absolute_error(yp_test, y_pred))
|
| 65 |
+
|
| 66 |
+
# 2. location classifier
|
| 67 |
+
loc_model = RandomForestClassifier(
|
| 68 |
+
n_estimators=300, max_depth=None, random_state=42, n_jobs=-1
|
| 69 |
+
)
|
| 70 |
+
loc_model.fit(X_train, yl_train)
|
| 71 |
+
log.info("Location accuracy = %.2f%%", 100 * accuracy_score(yl_test, loc_model.predict(X_test)))
|
| 72 |
+
|
| 73 |
+
# 3. price patterns
|
| 74 |
+
patterns = (
|
| 75 |
+
df.groupby("location")["price"]
|
| 76 |
+
.agg(["mean", "median", "std", "min", "max", "count"])
|
| 77 |
+
.round(0)
|
| 78 |
+
.to_dict(orient="index")
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
location_encoder = LabelEncoder().fit(df["location"])
|
| 82 |
+
return {
|
| 83 |
+
"price_model": price_model,
|
| 84 |
+
"location_model": loc_model,
|
| 85 |
+
"location_encoder": location_encoder,
|
| 86 |
+
"price_patterns": patterns,
|
| 87 |
+
"scaler": self.scaler,
|
| 88 |
+
"embedder": self.embedder,
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
# ------------ save ----------------------------------------------
|
| 92 |
+
def _save(self, bundle):
|
| 93 |
+
joblib.dump(bundle, MODEL_PATH)
|
| 94 |
+
log.info("Saved model bundle → %s", MODEL_PATH)
|
| 95 |
+
|
| 96 |
+
# ------------ report --------------------------------------------
|
| 97 |
+
def _report(self, df: pd.DataFrame):
|
| 98 |
+
print("\n📊 TRAINING REPORT")
|
| 99 |
+
print(f"Rows trained : {len(df):,}")
|
| 100 |
+
print(f"Locations : {df['location'].nunique()}")
|
| 101 |
+
print(f"Avg price : ${df['price'].mean():,.0f}")
|
| 102 |
+
print(f"Price range : ${df['price'].min():,.0f} – ${df['price'].max():,.0f}")
|
| 103 |
+
print(f"Models saved : {MODEL_PATH}\n")
|
| 104 |
+
|
| 105 |
+
# ------------ pipeline ------------------------------------------
|
| 106 |
+
def train(self):
|
| 107 |
+
print("\n" + "=" * 70)
|
| 108 |
+
print("🚀 ENHANCED MODEL TRAINING (REAL DATA)")
|
| 109 |
+
print("=" * 70 + "\n")
|
| 110 |
+
|
| 111 |
+
df = self._load()
|
| 112 |
+
if df is None:
|
| 113 |
+
return
|
| 114 |
+
X, y_price, y_loc = self._prep(df)
|
| 115 |
+
bundle = self._train(X, y_price, y_loc, df)
|
| 116 |
+
self._save(bundle)
|
| 117 |
+
self._report(df)
|
| 118 |
+
|
| 119 |
+
print("✅ Training complete – run test_ml_model.py to verify\n")
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
EnhancedModelTrainer().train()
|
models/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json
ADDED
|
File without changes
|
models/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja
ADDED
|
File without changes
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
|
| 3 |
+
size 90868376
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/58d4a9a45664eb9e12de9549c548c09b6134c17f
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7dfc82496ec33f906b5b0d6750c1e2397da6530c74d1ae3568c55bc2739125e7
|
| 3 |
+
size 10454
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/59d594003bf59880a884c574bf88ef7555bb0202
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc1993fde0a95c24ec6c022539d41cf6e2f7c9721e5415d6fb6897472a9cd4b7
|
| 3 |
+
size 53
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:953f9c0d463486b10a6871cc2fd59f223b2c70184f49815e7efbcab5d8908b41
|
| 3 |
+
size 612
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84e40c8e006c9b1d6c122e02cba9b02458120b5fb0c87b746c41e0207cf642cf
|
| 3 |
+
size 349
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/c79f2b6a0cea6f4b564fed1938984bace9d30ff0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acb92769e8195aabd29b7b2137a9e6d6e25c476a4f15aa4355c233426c61576b
|
| 3 |
+
size 350
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/cb202bfe2e3c98645018a6d12f182a434c9d3e02
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be50c3628f2bf5bb5e3a7f17b1f74611b2561a3a27eeab05e5aa30f411572037
|
| 3 |
+
size 466247
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/d1514c3162bbe87b343f565fadc62e6c06f04f03
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4be450dde3b0273bb9787637cfbd28fe04a7ba6ab9d36ac48e92b11e350ffc23
|
| 3 |
+
size 190
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:303df45a03609e4ead04bc3dc1536d0ab19b5358db685b6f3da123d05ec200e3
|
| 3 |
+
size 112
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fb140275c155a9c7c5a3b3e0e77a9e839594a938
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3
|
| 3 |
+
size 231508
|
models/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fd1b291129c607e5d49799f87cb219b27f98acdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:061ca9d39661d6c6d6de5ba27f79a1cd5770ea247f8d46412a68a498dc5ac9f3
|
| 3 |
+
size 116
|
models/models--sentence-transformers--all-MiniLM-L6-v2/refs/main
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:edb692c300dcf5dd2a56116221d7613e9d360b6f0be22bc3e02e040e095b135b
|
| 3 |
+
size 40
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../../blobs/d1514c3162bbe87b343f565fadc62e6c06f04f03
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/58d4a9a45664eb9e12de9549c548c09b6134c17f
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/fd1b291129c607e5d49799f87cb219b27f98acdf
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/model.safetensors
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/sentence_bert_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/59d594003bf59880a884c574bf88ef7555bb0202
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/cb202bfe2e3c98645018a6d12f182a434c9d3e02
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/c79f2b6a0cea6f4b564fed1938984bace9d30ff0
|
models/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/vocab.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../../blobs/fb140275c155a9c7c5a3b3e0e77a9e839594a938
|
scripts/download_training_data.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import sys, os
|
| 3 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 4 |
+
from app.ml.trainning.hf_dataset_downloader import HuggingFaceDatasetDownloader
|
| 5 |
+
HuggingFaceDatasetDownloader().download_and_prepare()
|
scripts/train_models.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import sys, os
|
| 3 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 4 |
+
from app.ml.trainning.train_enhanced_model import EnhancedModelTrainer
|
| 5 |
+
EnhancedModelTrainer().train()
|