Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
·
100d2c7
1
Parent(s):
03db0de
Rename compute_location to compute_mode
Browse files- app.py +26 -26
- eval.py +8 -8
- graph.py +5 -5
- index.py +5 -5
- main.py +19 -19
- prompts.py +2 -2
- retriever.py +13 -13
- util.py +2 -2
app.py
CHANGED
|
@@ -14,7 +14,7 @@ import uuid
|
|
| 14 |
import ast
|
| 15 |
import os
|
| 16 |
|
| 17 |
-
# Global settings for
|
| 18 |
COMPUTE = "cloud"
|
| 19 |
search_type = "hybrid"
|
| 20 |
|
|
@@ -31,7 +31,7 @@ graph_cloud = None
|
|
| 31 |
def run_workflow(input, history, thread_id):
|
| 32 |
"""The main function to run the chat workflow"""
|
| 33 |
|
| 34 |
-
# Get global graph for compute
|
| 35 |
global graph_edge, graph_cloud
|
| 36 |
if COMPUTE == "edge":
|
| 37 |
graph = graph_edge
|
|
@@ -52,7 +52,7 @@ def run_workflow(input, history, thread_id):
|
|
| 52 |
# Compile the graph with an in-memory checkpointer
|
| 53 |
memory = MemorySaver()
|
| 54 |
graph = graph_builder.compile(checkpointer=memory)
|
| 55 |
-
# Set global graph for compute
|
| 56 |
if COMPUTE == "edge":
|
| 57 |
graph_edge = graph
|
| 58 |
if COMPUTE == "cloud":
|
|
@@ -213,14 +213,14 @@ with gr.Blocks(
|
|
| 213 |
# Define components
|
| 214 |
# -----------------
|
| 215 |
|
| 216 |
-
|
| 217 |
choices=[
|
| 218 |
"cloud",
|
| 219 |
"edge" if torch.cuda.is_available() else "edge (not available)",
|
| 220 |
],
|
| 221 |
value=COMPUTE,
|
| 222 |
-
label="Compute
|
| 223 |
-
info=(None if torch.cuda.is_available() else "NOTE: edge
|
| 224 |
interactive=torch.cuda.is_available(),
|
| 225 |
render=False,
|
| 226 |
)
|
|
@@ -256,7 +256,7 @@ with gr.Blocks(
|
|
| 256 |
None,
|
| 257 |
(
|
| 258 |
"images/cloud.png"
|
| 259 |
-
if
|
| 260 |
else "images/chip.png"
|
| 261 |
),
|
| 262 |
),
|
|
@@ -293,22 +293,22 @@ with gr.Blocks(
|
|
| 293 |
**Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
|
| 294 |
An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
|
| 295 |
You can ask follow-up questions with the chat history as context.
|
| 296 |
-
➡️ To clear the history and start a new chat, press the 🗑️ trash button
|
| 297 |
-
**_Answers may be incorrect._
|
| 298 |
"""
|
| 299 |
return intro
|
| 300 |
|
| 301 |
-
def get_status_text(
|
| 302 |
-
if
|
| 303 |
status_text = f"""
|
| 304 |
-
📍
|
| 305 |
✨ text-embedding-3-small and {openai_model}<br>
|
| 306 |
⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
|
| 307 |
🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 308 |
"""
|
| 309 |
-
if
|
| 310 |
status_text = f"""
|
| 311 |
-
📍
|
| 312 |
✨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
|
| 313 |
🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 314 |
"""
|
|
@@ -327,7 +327,7 @@ with gr.Blocks(
|
|
| 327 |
info_text = f"""
|
| 328 |
**Database:** {len(sources)} emails from {start} to {end}.
|
| 329 |
**Features:** RAG, today's date, hybrid search (dense+sparse), query analysis,
|
| 330 |
-
multiple tool calls (cloud
|
| 331 |
**Tech:** LangChain + Hugging Face + Gradio; ChromaDB and [BM25S](https://github.com/xhluca/bm25s)-based retrievers.<br>
|
| 332 |
"""
|
| 333 |
return info_text
|
|
@@ -339,7 +339,7 @@ with gr.Blocks(
|
|
| 339 |
with gr.Column(scale=2):
|
| 340 |
intro = gr.Markdown(get_intro_text())
|
| 341 |
with gr.Column(scale=1):
|
| 342 |
-
|
| 343 |
chat_interface = gr.ChatInterface(
|
| 344 |
to_workflow,
|
| 345 |
chatbot=chatbot,
|
|
@@ -359,7 +359,7 @@ with gr.Blocks(
|
|
| 359 |
)
|
| 360 |
# Right column: Info, Examples, Citations
|
| 361 |
with gr.Column(scale=1):
|
| 362 |
-
status = gr.Markdown(get_status_text(
|
| 363 |
with gr.Accordion("ℹ️ More Info", open=False):
|
| 364 |
info = gr.Markdown(get_info_text())
|
| 365 |
with gr.Accordion("💡 Examples", open=True):
|
|
@@ -408,14 +408,14 @@ with gr.Blocks(
|
|
| 408 |
"""Return updated value for a component"""
|
| 409 |
return gr.update(value=value)
|
| 410 |
|
| 411 |
-
def set_compute(
|
| 412 |
global COMPUTE
|
| 413 |
-
COMPUTE =
|
| 414 |
|
| 415 |
-
def set_avatar(
|
| 416 |
-
if
|
| 417 |
image_file = "images/cloud.png"
|
| 418 |
-
if
|
| 419 |
image_file = "images/chip.png"
|
| 420 |
return gr.update(
|
| 421 |
avatar_images=(
|
|
@@ -458,15 +458,15 @@ with gr.Blocks(
|
|
| 458 |
"""Return cleared component"""
|
| 459 |
return component.clear()
|
| 460 |
|
| 461 |
-
|
| 462 |
# Update global COMPUTE variable
|
| 463 |
set_compute,
|
| 464 |
-
[
|
| 465 |
api_name=False,
|
| 466 |
).then(
|
| 467 |
# Change the app status text
|
| 468 |
get_status_text,
|
| 469 |
-
[
|
| 470 |
[status],
|
| 471 |
api_name=False,
|
| 472 |
).then(
|
|
@@ -478,7 +478,7 @@ with gr.Blocks(
|
|
| 478 |
).then(
|
| 479 |
# Change the chatbot avatar
|
| 480 |
set_avatar,
|
| 481 |
-
[
|
| 482 |
[chatbot],
|
| 483 |
api_name=False,
|
| 484 |
).then(
|
|
|
|
| 14 |
import ast
|
| 15 |
import os
|
| 16 |
|
| 17 |
+
# Global settings for compute_mode and search_type
|
| 18 |
COMPUTE = "cloud"
|
| 19 |
search_type = "hybrid"
|
| 20 |
|
|
|
|
| 31 |
def run_workflow(input, history, thread_id):
|
| 32 |
"""The main function to run the chat workflow"""
|
| 33 |
|
| 34 |
+
# Get global graph for compute mode
|
| 35 |
global graph_edge, graph_cloud
|
| 36 |
if COMPUTE == "edge":
|
| 37 |
graph = graph_edge
|
|
|
|
| 52 |
# Compile the graph with an in-memory checkpointer
|
| 53 |
memory = MemorySaver()
|
| 54 |
graph = graph_builder.compile(checkpointer=memory)
|
| 55 |
+
# Set global graph for compute mode
|
| 56 |
if COMPUTE == "edge":
|
| 57 |
graph_edge = graph
|
| 58 |
if COMPUTE == "cloud":
|
|
|
|
| 213 |
# Define components
|
| 214 |
# -----------------
|
| 215 |
|
| 216 |
+
compute_mode = gr.Radio(
|
| 217 |
choices=[
|
| 218 |
"cloud",
|
| 219 |
"edge" if torch.cuda.is_available() else "edge (not available)",
|
| 220 |
],
|
| 221 |
value=COMPUTE,
|
| 222 |
+
label="Compute Mode",
|
| 223 |
+
info=(None if torch.cuda.is_available() else "NOTE: edge mode requires GPU"),
|
| 224 |
interactive=torch.cuda.is_available(),
|
| 225 |
render=False,
|
| 226 |
)
|
|
|
|
| 256 |
None,
|
| 257 |
(
|
| 258 |
"images/cloud.png"
|
| 259 |
+
if compute_mode.value == "cloud"
|
| 260 |
else "images/chip.png"
|
| 261 |
),
|
| 262 |
),
|
|
|
|
| 293 |
**Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
|
| 294 |
An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
|
| 295 |
You can ask follow-up questions with the chat history as context.
|
| 296 |
+
➡️ To clear the history and start a new chat, press the 🗑️ trash button.
|
| 297 |
+
**_Answers may be incorrect._**
|
| 298 |
"""
|
| 299 |
return intro
|
| 300 |
|
| 301 |
+
def get_status_text(compute_mode):
|
| 302 |
+
if compute_mode.startswith("cloud"):
|
| 303 |
status_text = f"""
|
| 304 |
+
📍 Now in **cloud** mode, using the OpenAI API<br>
|
| 305 |
✨ text-embedding-3-small and {openai_model}<br>
|
| 306 |
⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
|
| 307 |
🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 308 |
"""
|
| 309 |
+
if compute_mode.startswith("edge"):
|
| 310 |
status_text = f"""
|
| 311 |
+
📍 Now in **edge** mode, using ZeroGPU hardware<br>
|
| 312 |
✨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
|
| 313 |
🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 314 |
"""
|
|
|
|
| 327 |
info_text = f"""
|
| 328 |
**Database:** {len(sources)} emails from {start} to {end}.
|
| 329 |
**Features:** RAG, today's date, hybrid search (dense+sparse), query analysis,
|
| 330 |
+
multiple tool calls (cloud mode), answer with citations (cloud mode), chat memory.
|
| 331 |
**Tech:** LangChain + Hugging Face + Gradio; ChromaDB and [BM25S](https://github.com/xhluca/bm25s)-based retrievers.<br>
|
| 332 |
"""
|
| 333 |
return info_text
|
|
|
|
| 339 |
with gr.Column(scale=2):
|
| 340 |
intro = gr.Markdown(get_intro_text())
|
| 341 |
with gr.Column(scale=1):
|
| 342 |
+
compute_mode.render()
|
| 343 |
chat_interface = gr.ChatInterface(
|
| 344 |
to_workflow,
|
| 345 |
chatbot=chatbot,
|
|
|
|
| 359 |
)
|
| 360 |
# Right column: Info, Examples, Citations
|
| 361 |
with gr.Column(scale=1):
|
| 362 |
+
status = gr.Markdown(get_status_text(compute_mode.value))
|
| 363 |
with gr.Accordion("ℹ️ More Info", open=False):
|
| 364 |
info = gr.Markdown(get_info_text())
|
| 365 |
with gr.Accordion("💡 Examples", open=True):
|
|
|
|
| 408 |
"""Return updated value for a component"""
|
| 409 |
return gr.update(value=value)
|
| 410 |
|
| 411 |
+
def set_compute(compute_mode):
|
| 412 |
global COMPUTE
|
| 413 |
+
COMPUTE = compute_mode
|
| 414 |
|
| 415 |
+
def set_avatar(compute_mode):
|
| 416 |
+
if compute_mode.startswith("cloud"):
|
| 417 |
image_file = "images/cloud.png"
|
| 418 |
+
if compute_mode.startswith("edge"):
|
| 419 |
image_file = "images/chip.png"
|
| 420 |
return gr.update(
|
| 421 |
avatar_images=(
|
|
|
|
| 458 |
"""Return cleared component"""
|
| 459 |
return component.clear()
|
| 460 |
|
| 461 |
+
compute_mode.change(
|
| 462 |
# Update global COMPUTE variable
|
| 463 |
set_compute,
|
| 464 |
+
[compute_mode],
|
| 465 |
api_name=False,
|
| 466 |
).then(
|
| 467 |
# Change the app status text
|
| 468 |
get_status_text,
|
| 469 |
+
[compute_mode],
|
| 470 |
[status],
|
| 471 |
api_name=False,
|
| 472 |
).then(
|
|
|
|
| 478 |
).then(
|
| 479 |
# Change the chatbot avatar
|
| 480 |
set_avatar,
|
| 481 |
+
[compute_mode],
|
| 482 |
[chatbot],
|
| 483 |
api_name=False,
|
| 484 |
).then(
|
eval.py
CHANGED
|
@@ -34,7 +34,7 @@ def load_queries_and_references(csv_path):
|
|
| 34 |
return queries, references
|
| 35 |
|
| 36 |
|
| 37 |
-
def build_eval_dataset(queries, references,
|
| 38 |
"""Build dataset for evaluation"""
|
| 39 |
dataset = []
|
| 40 |
for query, reference in zip(queries, references):
|
|
@@ -42,15 +42,15 @@ def build_eval_dataset(queries, references, compute_location, workflow, search_t
|
|
| 42 |
if workflow == "chain":
|
| 43 |
print("\n\n--- Query ---")
|
| 44 |
print(query)
|
| 45 |
-
response = RunChain(query,
|
| 46 |
print("--- Response ---")
|
| 47 |
print(response)
|
| 48 |
# Retrieve context documents for a query
|
| 49 |
-
retriever = BuildRetriever(
|
| 50 |
docs = retriever.invoke(query)
|
| 51 |
retrieved_contexts = [doc.page_content for doc in docs]
|
| 52 |
if workflow == "graph":
|
| 53 |
-
result = RunGraph(query,
|
| 54 |
retrieved_contexts = []
|
| 55 |
if "retrieved_emails" in result:
|
| 56 |
# Remove the source files (e.g. R-help/2022-September.txt) as it confuses the evaluator
|
|
@@ -79,10 +79,10 @@ def main():
|
|
| 79 |
description="Evaluate RAG retrieval and generation."
|
| 80 |
)
|
| 81 |
parser.add_argument(
|
| 82 |
-
"--
|
| 83 |
choices=["cloud", "edge"],
|
| 84 |
required=True,
|
| 85 |
-
help="Compute
|
| 86 |
)
|
| 87 |
parser.add_argument(
|
| 88 |
"--workflow",
|
|
@@ -97,13 +97,13 @@ def main():
|
|
| 97 |
help="Search type: dense, sparse, or hybrid.",
|
| 98 |
)
|
| 99 |
args = parser.parse_args()
|
| 100 |
-
|
| 101 |
workflow = args.workflow
|
| 102 |
search_type = args.search_type
|
| 103 |
|
| 104 |
queries, references = load_queries_and_references("eval.csv")
|
| 105 |
dataset = build_eval_dataset(
|
| 106 |
-
queries, references,
|
| 107 |
)
|
| 108 |
evaluation_dataset = EvaluationDataset.from_list(dataset)
|
| 109 |
|
|
|
|
| 34 |
return queries, references
|
| 35 |
|
| 36 |
|
| 37 |
+
def build_eval_dataset(queries, references, compute_mode, workflow, search_type):
|
| 38 |
"""Build dataset for evaluation"""
|
| 39 |
dataset = []
|
| 40 |
for query, reference in zip(queries, references):
|
|
|
|
| 42 |
if workflow == "chain":
|
| 43 |
print("\n\n--- Query ---")
|
| 44 |
print(query)
|
| 45 |
+
response = RunChain(query, compute_mode, search_type)
|
| 46 |
print("--- Response ---")
|
| 47 |
print(response)
|
| 48 |
# Retrieve context documents for a query
|
| 49 |
+
retriever = BuildRetriever(compute_mode, search_type)
|
| 50 |
docs = retriever.invoke(query)
|
| 51 |
retrieved_contexts = [doc.page_content for doc in docs]
|
| 52 |
if workflow == "graph":
|
| 53 |
+
result = RunGraph(query, compute_mode, search_type)
|
| 54 |
retrieved_contexts = []
|
| 55 |
if "retrieved_emails" in result:
|
| 56 |
# Remove the source files (e.g. R-help/2022-September.txt) as it confuses the evaluator
|
|
|
|
| 79 |
description="Evaluate RAG retrieval and generation."
|
| 80 |
)
|
| 81 |
parser.add_argument(
|
| 82 |
+
"--compute_mode",
|
| 83 |
choices=["cloud", "edge"],
|
| 84 |
required=True,
|
| 85 |
+
help="Compute mode: cloud or edge.",
|
| 86 |
)
|
| 87 |
parser.add_argument(
|
| 88 |
"--workflow",
|
|
|
|
| 97 |
help="Search type: dense, sparse, or hybrid.",
|
| 98 |
)
|
| 99 |
args = parser.parse_args()
|
| 100 |
+
compute_mode = args.compute_mode
|
| 101 |
workflow = args.workflow
|
| 102 |
search_type = args.search_type
|
| 103 |
|
| 104 |
queries, references = load_queries_and_references("eval.csv")
|
| 105 |
dataset = build_eval_dataset(
|
| 106 |
+
queries, references, compute_mode, workflow, search_type
|
| 107 |
)
|
| 108 |
evaluation_dataset = EvaluationDataset.from_list(dataset)
|
| 109 |
|
graph.py
CHANGED
|
@@ -105,7 +105,7 @@ def ToolifyHF(chat_model, system_message, system_message_suffix="", think=False)
|
|
| 105 |
|
| 106 |
def BuildGraph(
|
| 107 |
chat_model,
|
| 108 |
-
|
| 109 |
search_type,
|
| 110 |
top_k=6,
|
| 111 |
think_retrieve=False,
|
|
@@ -116,7 +116,7 @@ def BuildGraph(
|
|
| 116 |
|
| 117 |
Args:
|
| 118 |
chat_model: LangChain chat model from GetChatModel()
|
| 119 |
-
|
| 120 |
search_type: dense, sparse, or hybrid (for retriever)
|
| 121 |
top_k: number of documents to retrieve
|
| 122 |
think_retrieve: Whether to use thinking mode for retrieval
|
|
@@ -170,7 +170,7 @@ def BuildGraph(
|
|
| 170 |
end_year: Ending year for emails (optional)
|
| 171 |
"""
|
| 172 |
retriever = BuildRetriever(
|
| 173 |
-
|
| 174 |
)
|
| 175 |
# For now, just add the months to the search query
|
| 176 |
if months:
|
|
@@ -204,7 +204,7 @@ def BuildGraph(
|
|
| 204 |
if is_edge:
|
| 205 |
# For edge model (ChatHuggingFace)
|
| 206 |
query_model = ToolifyHF(
|
| 207 |
-
chat_model, retrieve_prompt(
|
| 208 |
).bind_tools([retrieve_emails])
|
| 209 |
generate_model = ToolifyHF(
|
| 210 |
chat_model, answer_prompt(), "", think_generate
|
|
@@ -226,7 +226,7 @@ def BuildGraph(
|
|
| 226 |
messages = normalize_messages(messages)
|
| 227 |
print_messages_summary(messages, "--- query: after normalization ---")
|
| 228 |
else:
|
| 229 |
-
messages = [SystemMessage(retrieve_prompt(
|
| 230 |
"messages"
|
| 231 |
]
|
| 232 |
response = query_model.invoke(messages)
|
|
|
|
| 105 |
|
| 106 |
def BuildGraph(
|
| 107 |
chat_model,
|
| 108 |
+
compute_mode,
|
| 109 |
search_type,
|
| 110 |
top_k=6,
|
| 111 |
think_retrieve=False,
|
|
|
|
| 116 |
|
| 117 |
Args:
|
| 118 |
chat_model: LangChain chat model from GetChatModel()
|
| 119 |
+
compute_mode: cloud or edge (for retriever)
|
| 120 |
search_type: dense, sparse, or hybrid (for retriever)
|
| 121 |
top_k: number of documents to retrieve
|
| 122 |
think_retrieve: Whether to use thinking mode for retrieval
|
|
|
|
| 170 |
end_year: Ending year for emails (optional)
|
| 171 |
"""
|
| 172 |
retriever = BuildRetriever(
|
| 173 |
+
compute_mode, search_type, top_k, start_year, end_year
|
| 174 |
)
|
| 175 |
# For now, just add the months to the search query
|
| 176 |
if months:
|
|
|
|
| 204 |
if is_edge:
|
| 205 |
# For edge model (ChatHuggingFace)
|
| 206 |
query_model = ToolifyHF(
|
| 207 |
+
chat_model, retrieve_prompt(compute_mode), "", think_retrieve
|
| 208 |
).bind_tools([retrieve_emails])
|
| 209 |
generate_model = ToolifyHF(
|
| 210 |
chat_model, answer_prompt(), "", think_generate
|
|
|
|
| 226 |
messages = normalize_messages(messages)
|
| 227 |
print_messages_summary(messages, "--- query: after normalization ---")
|
| 228 |
else:
|
| 229 |
+
messages = [SystemMessage(retrieve_prompt(compute_mode))] + state[
|
| 230 |
"messages"
|
| 231 |
]
|
| 232 |
response = query_model.invoke(messages)
|
index.py
CHANGED
|
@@ -9,14 +9,14 @@ from retriever import BuildRetriever, db_dir
|
|
| 9 |
from mods.bm25s_retriever import BM25SRetriever
|
| 10 |
|
| 11 |
|
| 12 |
-
def ProcessFile(file_path, search_type: str = "dense",
|
| 13 |
"""
|
| 14 |
Wrapper function to process file for dense or sparse search
|
| 15 |
|
| 16 |
Args:
|
| 17 |
file_path: File to process
|
| 18 |
search_type: Type of search to use. Options: "dense", "sparse"
|
| 19 |
-
|
| 20 |
"""
|
| 21 |
|
| 22 |
# Preprocess: remove quoted lines and handle email boundaries
|
|
@@ -67,7 +67,7 @@ def ProcessFile(file_path, search_type: str = "dense", compute_location: str = "
|
|
| 67 |
ProcessFileSparse(truncated_temp_file, file_path)
|
| 68 |
elif search_type == "dense":
|
| 69 |
# Handle dense search with ChromaDB
|
| 70 |
-
ProcessFileDense(truncated_temp_file, file_path,
|
| 71 |
else:
|
| 72 |
raise ValueError(f"Unsupported search type: {search_type}")
|
| 73 |
finally:
|
|
@@ -79,12 +79,12 @@ def ProcessFile(file_path, search_type: str = "dense", compute_location: str = "
|
|
| 79 |
pass
|
| 80 |
|
| 81 |
|
| 82 |
-
def ProcessFileDense(cleaned_temp_file, file_path,
|
| 83 |
"""
|
| 84 |
Process file for dense vector search using ChromaDB
|
| 85 |
"""
|
| 86 |
# Get a retriever instance
|
| 87 |
-
retriever = BuildRetriever(
|
| 88 |
# Load cleaned text file
|
| 89 |
loader = TextLoader(cleaned_temp_file)
|
| 90 |
documents = loader.load()
|
|
|
|
| 9 |
from mods.bm25s_retriever import BM25SRetriever
|
| 10 |
|
| 11 |
|
| 12 |
+
def ProcessFile(file_path, search_type: str = "dense", compute_mode: str = "cloud"):
|
| 13 |
"""
|
| 14 |
Wrapper function to process file for dense or sparse search
|
| 15 |
|
| 16 |
Args:
|
| 17 |
file_path: File to process
|
| 18 |
search_type: Type of search to use. Options: "dense", "sparse"
|
| 19 |
+
compute_mode: Compute mode for embeddings (cloud or edge)
|
| 20 |
"""
|
| 21 |
|
| 22 |
# Preprocess: remove quoted lines and handle email boundaries
|
|
|
|
| 67 |
ProcessFileSparse(truncated_temp_file, file_path)
|
| 68 |
elif search_type == "dense":
|
| 69 |
# Handle dense search with ChromaDB
|
| 70 |
+
ProcessFileDense(truncated_temp_file, file_path, compute_mode)
|
| 71 |
else:
|
| 72 |
raise ValueError(f"Unsupported search type: {search_type}")
|
| 73 |
finally:
|
|
|
|
| 79 |
pass
|
| 80 |
|
| 81 |
|
| 82 |
+
def ProcessFileDense(cleaned_temp_file, file_path, compute_mode):
|
| 83 |
"""
|
| 84 |
Process file for dense vector search using ChromaDB
|
| 85 |
"""
|
| 86 |
# Get a retriever instance
|
| 87 |
+
retriever = BuildRetriever(compute_mode, "dense")
|
| 88 |
# Load cleaned text file
|
| 89 |
loader = TextLoader(cleaned_temp_file)
|
| 90 |
documents = loader.load()
|
main.py
CHANGED
|
@@ -46,13 +46,13 @@ httpx_logger = logging.getLogger("httpx")
|
|
| 46 |
httpx_logger.setLevel(logging.WARNING)
|
| 47 |
|
| 48 |
|
| 49 |
-
def ProcessDirectory(path,
|
| 50 |
"""
|
| 51 |
Update vector store and sparse index for files in a directory, only adding new or updated files
|
| 52 |
|
| 53 |
Args:
|
| 54 |
path: Directory to process
|
| 55 |
-
|
| 56 |
|
| 57 |
Usage example:
|
| 58 |
ProcessDirectory("R-help", "cloud")
|
|
@@ -62,14 +62,14 @@ def ProcessDirectory(path, compute_location):
|
|
| 62 |
# https://stackoverflow.com/questions/76265631/chromadb-add-single-document-only-if-it-doesnt-exist
|
| 63 |
|
| 64 |
# Get a dense retriever instance
|
| 65 |
-
retriever = BuildRetriever(
|
| 66 |
|
| 67 |
# List all text files in target directory
|
| 68 |
file_paths = glob.glob(f"{path}/*.txt")
|
| 69 |
for file_path in file_paths:
|
| 70 |
|
| 71 |
# Process file for sparse search (BM25S)
|
| 72 |
-
ProcessFile(file_path, "sparse",
|
| 73 |
|
| 74 |
# Logic for dense search: skip file if already indexed
|
| 75 |
# Look for existing embeddings for this file
|
|
@@ -99,7 +99,7 @@ def ProcessDirectory(path, compute_location):
|
|
| 99 |
update_file = True
|
| 100 |
|
| 101 |
if add_file:
|
| 102 |
-
ProcessFile(file_path, "dense",
|
| 103 |
|
| 104 |
if update_file:
|
| 105 |
print(f"Chroma: updated embeddings for {file_path}")
|
|
@@ -110,7 +110,7 @@ def ProcessDirectory(path, compute_location):
|
|
| 110 |
]
|
| 111 |
files_to_keep = list(set(used_doc_ids))
|
| 112 |
# Get all files in the file store
|
| 113 |
-
file_store = f"{db_dir}/file_store_{
|
| 114 |
all_files = os.listdir(file_store)
|
| 115 |
# Iterate through the files and delete those not in the list
|
| 116 |
for file in all_files:
|
|
@@ -123,22 +123,22 @@ def ProcessDirectory(path, compute_location):
|
|
| 123 |
print(f"Chroma: no change for {file_path}")
|
| 124 |
|
| 125 |
|
| 126 |
-
def GetChatModel(
|
| 127 |
"""
|
| 128 |
Get a chat model.
|
| 129 |
|
| 130 |
Args:
|
| 131 |
-
|
| 132 |
"""
|
| 133 |
|
| 134 |
-
if
|
| 135 |
|
| 136 |
chat_model = ChatOpenAI(model=openai_model, temperature=0)
|
| 137 |
|
| 138 |
-
if
|
| 139 |
|
| 140 |
# Don't try to use edge models without a GPU
|
| 141 |
-
if
|
| 142 |
raise Exception("Edge chat model selected without GPU")
|
| 143 |
|
| 144 |
# Define the pipeline to pass to the HuggingFacePipeline class
|
|
@@ -169,7 +169,7 @@ def GetChatModel(compute_location):
|
|
| 169 |
|
| 170 |
def RunChain(
|
| 171 |
query,
|
| 172 |
-
|
| 173 |
search_type: str = "hybrid",
|
| 174 |
think: bool = False,
|
| 175 |
):
|
|
@@ -178,7 +178,7 @@ def RunChain(
|
|
| 178 |
|
| 179 |
Args:
|
| 180 |
query: User's query
|
| 181 |
-
|
| 182 |
search_type: Type of search to use. Options: "dense", "sparse", or "hybrid"
|
| 183 |
think: Control thinking mode for SmolLM3
|
| 184 |
|
|
@@ -187,13 +187,13 @@ def RunChain(
|
|
| 187 |
"""
|
| 188 |
|
| 189 |
# Get retriever instance
|
| 190 |
-
retriever = BuildRetriever(
|
| 191 |
|
| 192 |
if retriever is None:
|
| 193 |
return "No retriever available. Please process some documents first."
|
| 194 |
|
| 195 |
# Get chat model (LLM)
|
| 196 |
-
chat_model = GetChatModel(
|
| 197 |
|
| 198 |
# Control thinking for SmolLM3
|
| 199 |
system_prompt = answer_prompt()
|
|
@@ -230,7 +230,7 @@ def RunChain(
|
|
| 230 |
|
| 231 |
def RunGraph(
|
| 232 |
query: str,
|
| 233 |
-
|
| 234 |
search_type: str = "hybrid",
|
| 235 |
top_k: int = 6,
|
| 236 |
think_retrieve=False,
|
|
@@ -241,7 +241,7 @@ def RunGraph(
|
|
| 241 |
|
| 242 |
Args:
|
| 243 |
query: User query to start the chat
|
| 244 |
-
|
| 245 |
search_type: Type of search to use. Options: "dense", "sparse", or "hybrid"
|
| 246 |
top_k: Number of documents to retrieve
|
| 247 |
think_retrieve: Whether to use thinking mode for retrieval (tool-calling)
|
|
@@ -253,11 +253,11 @@ def RunGraph(
|
|
| 253 |
"""
|
| 254 |
|
| 255 |
# Get chat model used in both query and generate steps
|
| 256 |
-
chat_model = GetChatModel(
|
| 257 |
# Build the graph
|
| 258 |
graph_builder = BuildGraph(
|
| 259 |
chat_model,
|
| 260 |
-
|
| 261 |
search_type,
|
| 262 |
top_k,
|
| 263 |
think_retrieve,
|
|
|
|
| 46 |
httpx_logger.setLevel(logging.WARNING)
|
| 47 |
|
| 48 |
|
| 49 |
+
def ProcessDirectory(path, compute_mode):
|
| 50 |
"""
|
| 51 |
Update vector store and sparse index for files in a directory, only adding new or updated files
|
| 52 |
|
| 53 |
Args:
|
| 54 |
path: Directory to process
|
| 55 |
+
compute_mode: Compute mode for embeddings (cloud or edge)
|
| 56 |
|
| 57 |
Usage example:
|
| 58 |
ProcessDirectory("R-help", "cloud")
|
|
|
|
| 62 |
# https://stackoverflow.com/questions/76265631/chromadb-add-single-document-only-if-it-doesnt-exist
|
| 63 |
|
| 64 |
# Get a dense retriever instance
|
| 65 |
+
retriever = BuildRetriever(compute_mode, "dense")
|
| 66 |
|
| 67 |
# List all text files in target directory
|
| 68 |
file_paths = glob.glob(f"{path}/*.txt")
|
| 69 |
for file_path in file_paths:
|
| 70 |
|
| 71 |
# Process file for sparse search (BM25S)
|
| 72 |
+
ProcessFile(file_path, "sparse", compute_mode)
|
| 73 |
|
| 74 |
# Logic for dense search: skip file if already indexed
|
| 75 |
# Look for existing embeddings for this file
|
|
|
|
| 99 |
update_file = True
|
| 100 |
|
| 101 |
if add_file:
|
| 102 |
+
ProcessFile(file_path, "dense", compute_mode)
|
| 103 |
|
| 104 |
if update_file:
|
| 105 |
print(f"Chroma: updated embeddings for {file_path}")
|
|
|
|
| 110 |
]
|
| 111 |
files_to_keep = list(set(used_doc_ids))
|
| 112 |
# Get all files in the file store
|
| 113 |
+
file_store = f"{db_dir}/file_store_{compute_mode}"
|
| 114 |
all_files = os.listdir(file_store)
|
| 115 |
# Iterate through the files and delete those not in the list
|
| 116 |
for file in all_files:
|
|
|
|
| 123 |
print(f"Chroma: no change for {file_path}")
|
| 124 |
|
| 125 |
|
| 126 |
+
def GetChatModel(compute_mode):
|
| 127 |
"""
|
| 128 |
Get a chat model.
|
| 129 |
|
| 130 |
Args:
|
| 131 |
+
compute_mode: Compute mode for chat model (cloud or edge)
|
| 132 |
"""
|
| 133 |
|
| 134 |
+
if compute_mode == "cloud":
|
| 135 |
|
| 136 |
chat_model = ChatOpenAI(model=openai_model, temperature=0)
|
| 137 |
|
| 138 |
+
if compute_mode == "edge":
|
| 139 |
|
| 140 |
# Don't try to use edge models without a GPU
|
| 141 |
+
if compute_mode == "edge" and not torch.cuda.is_available():
|
| 142 |
raise Exception("Edge chat model selected without GPU")
|
| 143 |
|
| 144 |
# Define the pipeline to pass to the HuggingFacePipeline class
|
|
|
|
| 169 |
|
| 170 |
def RunChain(
|
| 171 |
query,
|
| 172 |
+
compute_mode: str = "cloud",
|
| 173 |
search_type: str = "hybrid",
|
| 174 |
think: bool = False,
|
| 175 |
):
|
|
|
|
| 178 |
|
| 179 |
Args:
|
| 180 |
query: User's query
|
| 181 |
+
compute_mode: Compute mode for embedding and chat models (cloud or edge)
|
| 182 |
search_type: Type of search to use. Options: "dense", "sparse", or "hybrid"
|
| 183 |
think: Control thinking mode for SmolLM3
|
| 184 |
|
|
|
|
| 187 |
"""
|
| 188 |
|
| 189 |
# Get retriever instance
|
| 190 |
+
retriever = BuildRetriever(compute_mode, search_type)
|
| 191 |
|
| 192 |
if retriever is None:
|
| 193 |
return "No retriever available. Please process some documents first."
|
| 194 |
|
| 195 |
# Get chat model (LLM)
|
| 196 |
+
chat_model = GetChatModel(compute_mode)
|
| 197 |
|
| 198 |
# Control thinking for SmolLM3
|
| 199 |
system_prompt = answer_prompt()
|
|
|
|
| 230 |
|
| 231 |
def RunGraph(
|
| 232 |
query: str,
|
| 233 |
+
compute_mode: str = "cloud",
|
| 234 |
search_type: str = "hybrid",
|
| 235 |
top_k: int = 6,
|
| 236 |
think_retrieve=False,
|
|
|
|
| 241 |
|
| 242 |
Args:
|
| 243 |
query: User query to start the chat
|
| 244 |
+
compute_mode: Compute mode for embedding and chat models (cloud or edge)
|
| 245 |
search_type: Type of search to use. Options: "dense", "sparse", or "hybrid"
|
| 246 |
top_k: Number of documents to retrieve
|
| 247 |
think_retrieve: Whether to use thinking mode for retrieval (tool-calling)
|
|
|
|
| 253 |
"""
|
| 254 |
|
| 255 |
# Get chat model used in both query and generate steps
|
| 256 |
+
chat_model = GetChatModel(compute_mode)
|
| 257 |
# Build the graph
|
| 258 |
graph_builder = BuildGraph(
|
| 259 |
chat_model,
|
| 260 |
+
compute_mode,
|
| 261 |
search_type,
|
| 262 |
top_k,
|
| 263 |
think_retrieve,
|
prompts.py
CHANGED
|
@@ -3,11 +3,11 @@ from util import get_sources, get_start_end_months
|
|
| 3 |
import re
|
| 4 |
|
| 5 |
|
| 6 |
-
def retrieve_prompt(
|
| 7 |
"""Return system prompt for query step
|
| 8 |
|
| 9 |
Args:
|
| 10 |
-
|
| 11 |
"""
|
| 12 |
|
| 13 |
# Get start and end months from database
|
|
|
|
| 3 |
import re
|
| 4 |
|
| 5 |
|
| 6 |
+
def retrieve_prompt(compute_mode):
|
| 7 |
"""Return system prompt for query step
|
| 8 |
|
| 9 |
Args:
|
| 10 |
+
compute_mode: Compute mode for embedding model (cloud or edge)
|
| 11 |
"""
|
| 12 |
|
| 13 |
# Get start and end months from database
|
retriever.py
CHANGED
|
@@ -29,7 +29,7 @@ db_dir = "db"
|
|
| 29 |
|
| 30 |
|
| 31 |
def BuildRetriever(
|
| 32 |
-
|
| 33 |
search_type: str = "hybrid",
|
| 34 |
top_k=6,
|
| 35 |
start_year=None,
|
|
@@ -40,7 +40,7 @@ def BuildRetriever(
|
|
| 40 |
All retriever types are configured to return up to 6 documents for fair comparison in evals.
|
| 41 |
|
| 42 |
Args:
|
| 43 |
-
|
| 44 |
search_type: Type of search to use. Options: "dense", "sparse", "hybrid"
|
| 45 |
top_k: Number of documents to retrieve for "dense" and "sparse"
|
| 46 |
start_year: Start year (optional)
|
|
@@ -49,10 +49,10 @@ def BuildRetriever(
|
|
| 49 |
if search_type == "dense":
|
| 50 |
if not (start_year or end_year):
|
| 51 |
# No year filtering, so directly use base retriever
|
| 52 |
-
return BuildRetrieverDense(
|
| 53 |
else:
|
| 54 |
# Get 1000 documents then keep top_k filtered by year
|
| 55 |
-
base_retriever = BuildRetrieverDense(
|
| 56 |
return TopKRetriever(
|
| 57 |
base_retriever=base_retriever,
|
| 58 |
top_k=top_k,
|
|
@@ -76,10 +76,10 @@ def BuildRetriever(
|
|
| 76 |
# Use floor (top_k // 2) and ceiling -(top_k // -2) to divide odd values of top_k
|
| 77 |
# https://stackoverflow.com/questions/14822184/is-there-a-ceiling-equivalent-of-operator-in-python
|
| 78 |
dense_retriever = BuildRetriever(
|
| 79 |
-
|
| 80 |
)
|
| 81 |
sparse_retriever = BuildRetriever(
|
| 82 |
-
|
| 83 |
)
|
| 84 |
ensemble_retriever = EnsembleRetriever(
|
| 85 |
retrievers=[dense_retriever, sparse_retriever], weights=[1, 1]
|
|
@@ -109,23 +109,23 @@ def BuildRetrieverSparse(top_k=6):
|
|
| 109 |
return retriever
|
| 110 |
|
| 111 |
|
| 112 |
-
def BuildRetrieverDense(
|
| 113 |
"""
|
| 114 |
Build dense retriever instance with ChromaDB vectorstore
|
| 115 |
|
| 116 |
Args:
|
| 117 |
-
|
| 118 |
top_k: Number of documents to retrieve
|
| 119 |
"""
|
| 120 |
|
| 121 |
# Don't try to use edge models without a GPU
|
| 122 |
-
if
|
| 123 |
raise Exception("Edge embeddings selected without GPU")
|
| 124 |
|
| 125 |
# Define embedding model
|
| 126 |
-
if
|
| 127 |
embedding_function = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 128 |
-
if
|
| 129 |
# embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", show_progress=True)
|
| 130 |
# https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.huggingface.HuggingFaceBgeEmbeddings.html
|
| 131 |
model_name = "nomic-ai/nomic-embed-text-v1.5"
|
|
@@ -143,7 +143,7 @@ def BuildRetrieverDense(compute_location: str, top_k=6):
|
|
| 143 |
)
|
| 144 |
# Create vector store
|
| 145 |
client_settings = chromadb.config.Settings(anonymized_telemetry=False)
|
| 146 |
-
persist_directory = f"{db_dir}/chroma_{
|
| 147 |
vectorstore = Chroma(
|
| 148 |
collection_name="R-help",
|
| 149 |
embedding_function=embedding_function,
|
|
@@ -151,7 +151,7 @@ def BuildRetrieverDense(compute_location: str, top_k=6):
|
|
| 151 |
persist_directory=persist_directory,
|
| 152 |
)
|
| 153 |
# The storage layer for the parent documents
|
| 154 |
-
file_store = f"{db_dir}/file_store_{
|
| 155 |
byte_store = LocalFileStore(file_store)
|
| 156 |
# Text splitter for child documents
|
| 157 |
child_splitter = RecursiveCharacterTextSplitter(
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
def BuildRetriever(
|
| 32 |
+
compute_mode,
|
| 33 |
search_type: str = "hybrid",
|
| 34 |
top_k=6,
|
| 35 |
start_year=None,
|
|
|
|
| 40 |
All retriever types are configured to return up to 6 documents for fair comparison in evals.
|
| 41 |
|
| 42 |
Args:
|
| 43 |
+
compute_mode: Compute mode for embeddings (cloud or edge)
|
| 44 |
search_type: Type of search to use. Options: "dense", "sparse", "hybrid"
|
| 45 |
top_k: Number of documents to retrieve for "dense" and "sparse"
|
| 46 |
start_year: Start year (optional)
|
|
|
|
| 49 |
if search_type == "dense":
|
| 50 |
if not (start_year or end_year):
|
| 51 |
# No year filtering, so directly use base retriever
|
| 52 |
+
return BuildRetrieverDense(compute_mode, top_k=top_k)
|
| 53 |
else:
|
| 54 |
# Get 1000 documents then keep top_k filtered by year
|
| 55 |
+
base_retriever = BuildRetrieverDense(compute_mode, top_k=1000)
|
| 56 |
return TopKRetriever(
|
| 57 |
base_retriever=base_retriever,
|
| 58 |
top_k=top_k,
|
|
|
|
| 76 |
# Use floor (top_k // 2) and ceiling -(top_k // -2) to divide odd values of top_k
|
| 77 |
# https://stackoverflow.com/questions/14822184/is-there-a-ceiling-equivalent-of-operator-in-python
|
| 78 |
dense_retriever = BuildRetriever(
|
| 79 |
+
compute_mode, "dense", (top_k // 2), start_year, end_year
|
| 80 |
)
|
| 81 |
sparse_retriever = BuildRetriever(
|
| 82 |
+
compute_mode, "sparse", -(top_k // -2), start_year, end_year
|
| 83 |
)
|
| 84 |
ensemble_retriever = EnsembleRetriever(
|
| 85 |
retrievers=[dense_retriever, sparse_retriever], weights=[1, 1]
|
|
|
|
| 109 |
return retriever
|
| 110 |
|
| 111 |
|
| 112 |
+
def BuildRetrieverDense(compute_mode: str, top_k=6):
|
| 113 |
"""
|
| 114 |
Build dense retriever instance with ChromaDB vectorstore
|
| 115 |
|
| 116 |
Args:
|
| 117 |
+
compute_mode: Compute mode for embeddings (cloud or edge)
|
| 118 |
top_k: Number of documents to retrieve
|
| 119 |
"""
|
| 120 |
|
| 121 |
# Don't try to use edge models without a GPU
|
| 122 |
+
if compute_mode == "edge" and not torch.cuda.is_available():
|
| 123 |
raise Exception("Edge embeddings selected without GPU")
|
| 124 |
|
| 125 |
# Define embedding model
|
| 126 |
+
if compute_mode == "cloud":
|
| 127 |
embedding_function = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 128 |
+
if compute_mode == "edge":
|
| 129 |
# embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", show_progress=True)
|
| 130 |
# https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.huggingface.HuggingFaceBgeEmbeddings.html
|
| 131 |
model_name = "nomic-ai/nomic-embed-text-v1.5"
|
|
|
|
| 143 |
)
|
| 144 |
# Create vector store
|
| 145 |
client_settings = chromadb.config.Settings(anonymized_telemetry=False)
|
| 146 |
+
persist_directory = f"{db_dir}/chroma_{compute_mode}"
|
| 147 |
vectorstore = Chroma(
|
| 148 |
collection_name="R-help",
|
| 149 |
embedding_function=embedding_function,
|
|
|
|
| 151 |
persist_directory=persist_directory,
|
| 152 |
)
|
| 153 |
# The storage layer for the parent documents
|
| 154 |
+
file_store = f"{db_dir}/file_store_{compute_mode}"
|
| 155 |
byte_store = LocalFileStore(file_store)
|
| 156 |
# Text splitter for child documents
|
| 157 |
child_splitter = RecursiveCharacterTextSplitter(
|
util.py
CHANGED
|
@@ -5,7 +5,7 @@ import os
|
|
| 5 |
import re
|
| 6 |
|
| 7 |
|
| 8 |
-
def get_collection(
|
| 9 |
"""
|
| 10 |
Returns the vectorstore collection.
|
| 11 |
|
|
@@ -16,7 +16,7 @@ def get_collection(compute_location):
|
|
| 16 |
# Number of parent documents (unique doc_ids)
|
| 17 |
len(set([m["doc_id"] for m in collection["metadatas"]]))
|
| 18 |
"""
|
| 19 |
-
retriever = BuildRetriever(
|
| 20 |
return retriever.vectorstore.get()
|
| 21 |
|
| 22 |
|
|
|
|
| 5 |
import re
|
| 6 |
|
| 7 |
|
| 8 |
+
def get_collection(compute_mode):
|
| 9 |
"""
|
| 10 |
Returns the vectorstore collection.
|
| 11 |
|
|
|
|
| 16 |
# Number of parent documents (unique doc_ids)
|
| 17 |
len(set([m["doc_id"] for m in collection["metadatas"]]))
|
| 18 |
"""
|
| 19 |
+
retriever = BuildRetriever(compute_mode, "dense")
|
| 20 |
return retriever.vectorstore.get()
|
| 21 |
|
| 22 |
|