paivalucass commited on
Commit
b510e43
Β·
1 Parent(s): 624bf31

update llm model to qwen 2

Browse files
Files changed (2) hide show
  1. README.md +5 -5
  2. app/main.py +42 -61
README.md CHANGED
@@ -12,9 +12,9 @@ short_description: LLM based Restaurant Recommendation chat bot.
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
 
14
 
15
- # AI Restaurant Recommendation Chatbot (LLAMA 3.2) 🍜
16
 
17
- This project implements an intelligent restaurant recommendation system using OpenStreetMap, sentence-transformer embeddings, FAISS vector search, and an LLM assistant (Llama 3.2 Instruct).
18
  It provides two main features:
19
 
20
  1. **Embedding-based restaurant recommendations**
@@ -68,7 +68,7 @@ sentence-transformers/all-MiniLM-L6-v2
68
 
69
  - The selected restaurants are fed into an LLM prompt.
70
 
71
- - Llama-3.2-1B-Instruct generates a human-friendly explanation.
72
 
73
  ## 2. Technologies Used
74
  #### **Core**
@@ -119,7 +119,7 @@ The endpoint:
119
 
120
  2. Creates a structured chat template.
121
 
122
- 3. Uses Llama-3.2-1B-Instruct.
123
 
124
  4. Returns a natural-language text response recommending the top 4 best restaurant for the user's query.
125
 
@@ -137,7 +137,7 @@ The endpoint:
137
 
138
  ### LLM Model
139
 
140
- - **meta-llama/Llama-3.2-1B-Instruct**
141
 
142
  - Runs on CPU in this project
143
 
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
 
14
 
15
+ # AI Restaurant Recommendation Chatbot (Qwen2-1.5B-Instruct) 🍜
16
 
17
+ This project implements an intelligent restaurant recommendation system using OpenStreetMap, sentence-transformer embeddings, FAISS vector search, and an LLM assistant (Qwen2-1.5B-Instruct).
18
  It provides two main features:
19
 
20
  1. **Embedding-based restaurant recommendations**
 
68
 
69
  - The selected restaurants are fed into an LLM prompt.
70
 
71
+ - Qwen2-1.5B-Instruct generates a human-friendly explanation.
72
 
73
  ## 2. Technologies Used
74
  #### **Core**
 
119
 
120
  2. Creates a structured chat template.
121
 
122
+ 3. Uses Qwen2-1.5B-Instruct.
123
 
124
  4. Returns a natural-language text response recommending the top 4 best restaurant for the user's query.
125
 
 
137
 
138
  ### LLM Model
139
 
140
+ - **Qwen/Qwen2-1.5B-Instruct**
141
 
142
  - Runs on CPU in this project
143
 
app/main.py CHANGED
@@ -8,7 +8,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
8
  import torch
9
 
10
  # Name of the local Llama model used for generating natural language responses.
11
- MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
12
 
13
  # Create the FastAPI application.
14
  app = FastAPI(title="Restaurant Recommender GenAI API")
@@ -53,76 +53,57 @@ def recommend(query: str, user_lat: float, user_lon: float, radius: float = 100.
53
  @app.get("/chat")
54
  def recommend_llm(query: str, user_lat: float, user_lon: float, radius: float = 100.0, k: int = 20):
55
 
56
- # Retrieve restaurants from Overpass + embedding ranking.
57
  results = recommender.recommend(query, user_lat, user_lon, radius, k)
58
  if not results:
59
  return {"response": "Sorry, I couldn't find any restaurants nearby."}
60
 
61
- # Construct the message format expected by Llama 3's chat template.
62
- # The system message instructs the model to respond clearly and without Markdown.
63
- # The user message includes the query and the list of nearby restaurants.
64
- messages = [
65
- {
66
- "role": "system",
67
- "content": (
68
- "You are a restaurant recommendation assistant.\n"
69
- "Rules:\n"
70
- "- Do not use Markdown.\n"
71
- "- Use simple, natural phrasing.\n"
72
- "- Output plain text only."
73
- )
74
- },
75
- {
76
- "role": "user",
77
- "content": (
78
- f"I want food like: '{query}'.\n\n"
79
- "Here are nearby restaurants:\n" +
80
- "\n".join([
81
- f"- {r['name']} ({r['cuisine']}), {r['distance_km']:.1f} km"
82
- for r in results
83
- ]) +
84
- "\n\nSelect the best 5 options and explain the reason for each choice. "
85
- "Do not invent information."
86
- )
87
- }
88
- ]
89
-
90
- # Convert the messages to model-ready tensors using Llama's chat template.
91
- encoded = tokenizer.apply_chat_template(
92
- messages,
93
- return_tensors="pt",
94
- add_generation_prompt=True,
95
- padding=True
96
- )
97
 
98
- input_ids = encoded.to(device)
99
- attention_mask = (input_ids != tokenizer.pad_token_id).long().to(device)
 
 
 
 
 
 
100
 
101
- # Generate the assistant's response.
102
  outputs = model.generate(
103
- input_ids=input_ids,
104
- attention_mask=attention_mask,
105
- max_new_tokens=200,
106
- temperature=0.7,
107
- do_sample=True,
108
  )
109
 
110
- # Decode the raw text returned by the model.
111
  response_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
112
 
113
- # The generated text includes the model's internal chat headers.
114
- # This extracts only the final assistant message.
115
- assistant_tag = "<|start_header_id|>assistant<|end_header_id|>"
116
- start = response_text.rfind(assistant_tag)
117
-
118
- if start != -1:
119
- # Keep only the section after the assistant header.
120
- answer = response_text[start + len(assistant_tag):].strip()
121
- else:
122
- # Fallback: strip special tokens.
123
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
124
 
125
- # Remove the end-of-turn token if present.
126
- answer = answer.replace("<|eot_id|>", "").strip()
127
 
128
- return {"response": answer}
 
8
  import torch
9
 
10
  # Name of the local Llama model used for generating natural language responses.
11
+ MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
12
 
13
  # Create the FastAPI application.
14
  app = FastAPI(title="Restaurant Recommender GenAI API")
 
53
  @app.get("/chat")
54
  def recommend_llm(query: str, user_lat: float, user_lon: float, radius: float = 100.0, k: int = 20):
55
 
 
56
  results = recommender.recommend(query, user_lat, user_lon, radius, k)
57
  if not results:
58
  return {"response": "Sorry, I couldn't find any restaurants nearby."}
59
 
60
+ # Build list of restaurants
61
+ restaurant_list = "\n".join([
62
+ f"- {r['name']} ({r['cuisine']}), {r['distance_km']:.1f} km"
63
+ for r in results
64
+ ])
65
+
66
+ # Qwen-style structured chat prompt
67
+ prompt = f"""
68
+ <|im_start|>system
69
+ You are a restaurant recommendation assistant.
70
+ Rules:
71
+ - Do not use Markdown.
72
+ - Do not invent information.
73
+ - Use simple natural language.
74
+ - Only use the details given to you.
75
+ - Do not create details that are not given to you.
76
+ <|im_end|>
77
+
78
+ <|im_start|>user
79
+ I want food like: '{query}'.
80
+
81
+ Here are nearby restaurants:
82
+ {restaurant_list}
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ Select the best 3 options and explain why each was chosen.
85
+ Do not add details that are not included above.
86
+ <|im_end|>
87
+
88
+ <|im_start|>assistant
89
+ """
90
+
91
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
92
 
 
93
  outputs = model.generate(
94
+ **inputs,
95
+ max_new_tokens=350,
96
+ temperature=0.3,
97
+ do_sample=True
 
98
  )
99
 
 
100
  response_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
101
 
102
+ # Extract only the final assistant response
103
+ if "<|im_start|>assistant" in response_text:
104
+ response_text = response_text.split("<|im_start|>assistant")[-1].strip()
 
 
 
 
 
 
 
 
105
 
106
+ if "<|im_end|>" in response_text:
107
+ response_text = response_text.split("<|im_end|>")[0].strip()
108
 
109
+ return {"response": response_text}