Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on Jul 23

Commit

100d2c7

1 Parent(s): 03db0de

Rename compute_location to compute_mode

Browse files

Files changed (8) hide show

app.py +26 -26
eval.py +8 -8
graph.py +5 -5
index.py +5 -5
main.py +19 -19
prompts.py +2 -2
retriever.py +13 -13
util.py +2 -2

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ import uuid
 import ast
 import os
-# Global settings for compute_location and search_type
 COMPUTE = "cloud"
 search_type = "hybrid"
@@ -31,7 +31,7 @@ graph_cloud = None
 def run_workflow(input, history, thread_id):
     """The main function to run the chat workflow"""
-    # Get global graph for compute location
     global graph_edge, graph_cloud
     if COMPUTE == "edge":
         graph = graph_edge
@@ -52,7 +52,7 @@ def run_workflow(input, history, thread_id):
         # Compile the graph with an in-memory checkpointer
         memory = MemorySaver()
         graph = graph_builder.compile(checkpointer=memory)
-        # Set global graph for compute location
         if COMPUTE == "edge":
             graph_edge = graph
         if COMPUTE == "cloud":
@@ -213,14 +213,14 @@ with gr.Blocks(
     # Define components
     # -----------------
-    compute_location = gr.Radio(
         choices=[
             "cloud",
             "edge" if torch.cuda.is_available() else "edge (not available)",
         ],
         value=COMPUTE,
-        label="Compute Location",
-        info=(None if torch.cuda.is_available() else "NOTE: edge model requires GPU"),
         interactive=torch.cuda.is_available(),
         render=False,
     )
@@ -256,7 +256,7 @@ with gr.Blocks(
             None,
             (
                 "images/cloud.png"
-                if compute_location.value == "cloud"
                 else "images/chip.png"
             ),
         ),
@@ -293,22 +293,22 @@ with gr.Blocks(
             **Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
             An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
             You can ask follow-up questions with the chat history as context.
-            ➡️ To clear the history and start a new chat, press the 🗑️ trash button.<br>
-            **_Answers may be incorrect._**<br>
             """
         return intro
-    def get_status_text(compute_location):
-        if compute_location.startswith("cloud"):
             status_text = f"""
-            📍 This is the **cloud** version, using the OpenAI API<br>
             ✨ text-embedding-3-small and {openai_model}<br>
             ⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
-        if compute_location.startswith("edge"):
             status_text = f"""
-            📍 This is the **edge** version, using ZeroGPU hardware<br>
             ✨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
@@ -327,7 +327,7 @@ with gr.Blocks(
         info_text = f"""
             **Database:** {len(sources)} emails from {start} to {end}.
             **Features:** RAG, today's date, hybrid search (dense+sparse), query analysis,
-            multiple tool calls (cloud model), answer with citations, chat memory.
             **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and [BM25S](https://github.com/xhluca/bm25s)-based retrievers.<br>
             """
         return info_text
@@ -339,7 +339,7 @@ with gr.Blocks(
                 with gr.Column(scale=2):
                     intro = gr.Markdown(get_intro_text())
                 with gr.Column(scale=1):
-                    compute_location.render()
             chat_interface = gr.ChatInterface(
                 to_workflow,
                 chatbot=chatbot,
@@ -359,7 +359,7 @@ with gr.Blocks(
             )
         # Right column: Info, Examples, Citations
         with gr.Column(scale=1):
-            status = gr.Markdown(get_status_text(compute_location.value))
             with gr.Accordion("ℹ️ More Info", open=False):
                 info = gr.Markdown(get_info_text())
             with gr.Accordion("💡 Examples", open=True):
@@ -408,14 +408,14 @@ with gr.Blocks(
         """Return updated value for a component"""
         return gr.update(value=value)
-    def set_compute(compute_location):
         global COMPUTE
-        COMPUTE = compute_location
-    def set_avatar(compute_location):
-        if compute_location.startswith("cloud"):
             image_file = "images/cloud.png"
-        if compute_location.startswith("edge"):
             image_file = "images/chip.png"
         return gr.update(
             avatar_images=(
@@ -458,15 +458,15 @@ with gr.Blocks(
         """Return cleared component"""
         return component.clear()
-    compute_location.change(
         # Update global COMPUTE variable
         set_compute,
-        [compute_location],
         api_name=False,
     ).then(
         # Change the app status text
         get_status_text,
-        [compute_location],
         [status],
         api_name=False,
     ).then(
@@ -478,7 +478,7 @@ with gr.Blocks(
     ).then(
         # Change the chatbot avatar
         set_avatar,
-        [compute_location],
         [chatbot],
         api_name=False,
     ).then(

 import ast
 import os
+# Global settings for compute_mode and search_type
 COMPUTE = "cloud"
 search_type = "hybrid"
 def run_workflow(input, history, thread_id):
     """The main function to run the chat workflow"""
+    # Get global graph for compute mode
     global graph_edge, graph_cloud
     if COMPUTE == "edge":
         graph = graph_edge
         # Compile the graph with an in-memory checkpointer
         memory = MemorySaver()
         graph = graph_builder.compile(checkpointer=memory)
+        # Set global graph for compute mode
         if COMPUTE == "edge":
             graph_edge = graph
         if COMPUTE == "cloud":
     # Define components
     # -----------------
+    compute_mode = gr.Radio(
         choices=[
             "cloud",
             "edge" if torch.cuda.is_available() else "edge (not available)",
         ],
         value=COMPUTE,
+        label="Compute Mode",
+        info=(None if torch.cuda.is_available() else "NOTE: edge mode requires GPU"),
         interactive=torch.cuda.is_available(),
         render=False,
     )
             None,
             (
                 "images/cloud.png"
+                if compute_mode.value == "cloud"
                 else "images/chip.png"
             ),
         ),
             **Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
             An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
             You can ask follow-up questions with the chat history as context.
+            ➡️ To clear the history and start a new chat, press the 🗑️ trash button.
+            **_Answers may be incorrect._**
             """
         return intro
+    def get_status_text(compute_mode):
+        if compute_mode.startswith("cloud"):
             status_text = f"""
+            📍 Now in **cloud** mode, using the OpenAI API<br>
             ✨ text-embedding-3-small and {openai_model}<br>
             ⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
+        if compute_mode.startswith("edge"):
             status_text = f"""
+            📍 Now in **edge** mode, using ZeroGPU hardware<br>
             ✨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
         info_text = f"""
             **Database:** {len(sources)} emails from {start} to {end}.
             **Features:** RAG, today's date, hybrid search (dense+sparse), query analysis,
+            multiple tool calls (cloud mode), answer with citations (cloud mode), chat memory.
             **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and [BM25S](https://github.com/xhluca/bm25s)-based retrievers.<br>
             """
         return info_text
                 with gr.Column(scale=2):
                     intro = gr.Markdown(get_intro_text())
                 with gr.Column(scale=1):
+                    compute_mode.render()
             chat_interface = gr.ChatInterface(
                 to_workflow,
                 chatbot=chatbot,
             )
         # Right column: Info, Examples, Citations
         with gr.Column(scale=1):
+            status = gr.Markdown(get_status_text(compute_mode.value))
             with gr.Accordion("ℹ️ More Info", open=False):
                 info = gr.Markdown(get_info_text())
             with gr.Accordion("💡 Examples", open=True):
         """Return updated value for a component"""
         return gr.update(value=value)
+    def set_compute(compute_mode):
         global COMPUTE
+        COMPUTE = compute_mode
+    def set_avatar(compute_mode):
+        if compute_mode.startswith("cloud"):
             image_file = "images/cloud.png"
+        if compute_mode.startswith("edge"):
             image_file = "images/chip.png"
         return gr.update(
             avatar_images=(
         """Return cleared component"""
         return component.clear()
+    compute_mode.change(
         # Update global COMPUTE variable
         set_compute,
+        [compute_mode],
         api_name=False,
     ).then(
         # Change the app status text
         get_status_text,
+        [compute_mode],
         [status],
         api_name=False,
     ).then(
     ).then(
         # Change the chatbot avatar
         set_avatar,
+        [compute_mode],
         [chatbot],
         api_name=False,
     ).then(

eval.py CHANGED Viewed

@@ -34,7 +34,7 @@ def load_queries_and_references(csv_path):
     return queries, references
-def build_eval_dataset(queries, references, compute_location, workflow, search_type):
     """Build dataset for evaluation"""
     dataset = []
     for query, reference in zip(queries, references):
@@ -42,15 +42,15 @@ def build_eval_dataset(queries, references, compute_location, workflow, search_t
             if workflow == "chain":
                 print("\n\n--- Query ---")
                 print(query)
-                response = RunChain(query, compute_location, search_type)
                 print("--- Response ---")
                 print(response)
                 # Retrieve context documents for a query
-                retriever = BuildRetriever(compute_location, search_type)
                 docs = retriever.invoke(query)
                 retrieved_contexts = [doc.page_content for doc in docs]
             if workflow == "graph":
-                result = RunGraph(query, compute_location, search_type)
                 retrieved_contexts = []
                 if "retrieved_emails" in result:
                     # Remove the source files (e.g. R-help/2022-September.txt) as it confuses the evaluator
@@ -79,10 +79,10 @@ def main():
         description="Evaluate RAG retrieval and generation."
     )
     parser.add_argument(
-        "--compute_location",
         choices=["cloud", "edge"],
         required=True,
-        help="Compute location: cloud or edge.",
     )
     parser.add_argument(
         "--workflow",
@@ -97,13 +97,13 @@ def main():
         help="Search type: dense, sparse, or hybrid.",
     )
     args = parser.parse_args()
-    compute_location = args.compute_location
     workflow = args.workflow
     search_type = args.search_type
     queries, references = load_queries_and_references("eval.csv")
     dataset = build_eval_dataset(
-        queries, references, compute_location, workflow, search_type
     )
     evaluation_dataset = EvaluationDataset.from_list(dataset)

     return queries, references
+def build_eval_dataset(queries, references, compute_mode, workflow, search_type):
     """Build dataset for evaluation"""
     dataset = []
     for query, reference in zip(queries, references):
             if workflow == "chain":
                 print("\n\n--- Query ---")
                 print(query)
+                response = RunChain(query, compute_mode, search_type)
                 print("--- Response ---")
                 print(response)
                 # Retrieve context documents for a query
+                retriever = BuildRetriever(compute_mode, search_type)
                 docs = retriever.invoke(query)
                 retrieved_contexts = [doc.page_content for doc in docs]
             if workflow == "graph":
+                result = RunGraph(query, compute_mode, search_type)
                 retrieved_contexts = []
                 if "retrieved_emails" in result:
                     # Remove the source files (e.g. R-help/2022-September.txt) as it confuses the evaluator
         description="Evaluate RAG retrieval and generation."
     )
     parser.add_argument(
+        "--compute_mode",
         choices=["cloud", "edge"],
         required=True,
+        help="Compute mode: cloud or edge.",
     )
     parser.add_argument(
         "--workflow",
         help="Search type: dense, sparse, or hybrid.",
     )
     args = parser.parse_args()
+    compute_mode = args.compute_mode
     workflow = args.workflow
     search_type = args.search_type
     queries, references = load_queries_and_references("eval.csv")
     dataset = build_eval_dataset(
+        queries, references, compute_mode, workflow, search_type
     )
     evaluation_dataset = EvaluationDataset.from_list(dataset)

graph.py CHANGED Viewed

@@ -105,7 +105,7 @@ def ToolifyHF(chat_model, system_message, system_message_suffix="", think=False)
 def BuildGraph(
     chat_model,
-    compute_location,
     search_type,
     top_k=6,
     think_retrieve=False,
@@ -116,7 +116,7 @@ def BuildGraph(
     Args:
         chat_model: LangChain chat model from GetChatModel()
-        compute_location: cloud or edge (for retriever)
         search_type: dense, sparse, or hybrid (for retriever)
         top_k: number of documents to retrieve
         think_retrieve: Whether to use thinking mode for retrieval
@@ -170,7 +170,7 @@ def BuildGraph(
             end_year: Ending year for emails (optional)
         """
         retriever = BuildRetriever(
-            compute_location, search_type, top_k, start_year, end_year
         )
         # For now, just add the months to the search query
         if months:
@@ -204,7 +204,7 @@ def BuildGraph(
     if is_edge:
         # For edge model (ChatHuggingFace)
         query_model = ToolifyHF(
-            chat_model, retrieve_prompt(compute_location), "", think_retrieve
         ).bind_tools([retrieve_emails])
         generate_model = ToolifyHF(
             chat_model, answer_prompt(), "", think_generate
@@ -226,7 +226,7 @@ def BuildGraph(
             messages = normalize_messages(messages)
             print_messages_summary(messages, "--- query: after normalization ---")
         else:
-            messages = [SystemMessage(retrieve_prompt(compute_location))] + state[
                 "messages"
             ]
         response = query_model.invoke(messages)

 def BuildGraph(
     chat_model,
+    compute_mode,
     search_type,
     top_k=6,
     think_retrieve=False,
     Args:
         chat_model: LangChain chat model from GetChatModel()
+        compute_mode: cloud or edge (for retriever)
         search_type: dense, sparse, or hybrid (for retriever)
         top_k: number of documents to retrieve
         think_retrieve: Whether to use thinking mode for retrieval
             end_year: Ending year for emails (optional)
         """
         retriever = BuildRetriever(
+            compute_mode, search_type, top_k, start_year, end_year
         )
         # For now, just add the months to the search query
         if months:
     if is_edge:
         # For edge model (ChatHuggingFace)
         query_model = ToolifyHF(
+            chat_model, retrieve_prompt(compute_mode), "", think_retrieve
         ).bind_tools([retrieve_emails])
         generate_model = ToolifyHF(
             chat_model, answer_prompt(), "", think_generate
             messages = normalize_messages(messages)
             print_messages_summary(messages, "--- query: after normalization ---")
         else:
+            messages = [SystemMessage(retrieve_prompt(compute_mode))] + state[
                 "messages"
             ]
         response = query_model.invoke(messages)

index.py CHANGED Viewed

@@ -9,14 +9,14 @@ from retriever import BuildRetriever, db_dir
 from mods.bm25s_retriever import BM25SRetriever
-def ProcessFile(file_path, search_type: str = "dense", compute_location: str = "cloud"):
     """
     Wrapper function to process file for dense or sparse search
     Args:
         file_path: File to process
         search_type: Type of search to use. Options: "dense", "sparse"
-        compute_location: Compute location for embeddings (cloud or edge)
     """
     # Preprocess: remove quoted lines and handle email boundaries
@@ -67,7 +67,7 @@ def ProcessFile(file_path, search_type: str = "dense", compute_location: str = "
             ProcessFileSparse(truncated_temp_file, file_path)
         elif search_type == "dense":
             # Handle dense search with ChromaDB
-            ProcessFileDense(truncated_temp_file, file_path, compute_location)
         else:
             raise ValueError(f"Unsupported search type: {search_type}")
     finally:
@@ -79,12 +79,12 @@ def ProcessFile(file_path, search_type: str = "dense", compute_location: str = "
             pass
-def ProcessFileDense(cleaned_temp_file, file_path, compute_location):
     """
     Process file for dense vector search using ChromaDB
     """
     # Get a retriever instance
-    retriever = BuildRetriever(compute_location, "dense")
     # Load cleaned text file
     loader = TextLoader(cleaned_temp_file)
     documents = loader.load()

 from mods.bm25s_retriever import BM25SRetriever
+def ProcessFile(file_path, search_type: str = "dense", compute_mode: str = "cloud"):
     """
     Wrapper function to process file for dense or sparse search
     Args:
         file_path: File to process
         search_type: Type of search to use. Options: "dense", "sparse"
+        compute_mode: Compute mode for embeddings (cloud or edge)
     """
     # Preprocess: remove quoted lines and handle email boundaries
             ProcessFileSparse(truncated_temp_file, file_path)
         elif search_type == "dense":
             # Handle dense search with ChromaDB
+            ProcessFileDense(truncated_temp_file, file_path, compute_mode)
         else:
             raise ValueError(f"Unsupported search type: {search_type}")
     finally:
             pass
+def ProcessFileDense(cleaned_temp_file, file_path, compute_mode):
     """
     Process file for dense vector search using ChromaDB
     """
     # Get a retriever instance
+    retriever = BuildRetriever(compute_mode, "dense")
     # Load cleaned text file
     loader = TextLoader(cleaned_temp_file)
     documents = loader.load()

main.py CHANGED Viewed

@@ -46,13 +46,13 @@ httpx_logger = logging.getLogger("httpx")
 httpx_logger.setLevel(logging.WARNING)
-def ProcessDirectory(path, compute_location):
     """
     Update vector store and sparse index for files in a directory, only adding new or updated files
     Args:
         path: Directory to process
-        compute_location: Compute location for embeddings (cloud or edge)
     Usage example:
         ProcessDirectory("R-help", "cloud")
@@ -62,14 +62,14 @@ def ProcessDirectory(path, compute_location):
     # https://stackoverflow.com/questions/76265631/chromadb-add-single-document-only-if-it-doesnt-exist
     # Get a dense retriever instance
-    retriever = BuildRetriever(compute_location, "dense")
     # List all text files in target directory
     file_paths = glob.glob(f"{path}/*.txt")
     for file_path in file_paths:
         # Process file for sparse search (BM25S)
-        ProcessFile(file_path, "sparse", compute_location)
         # Logic for dense search: skip file if already indexed
         # Look for existing embeddings for this file
@@ -99,7 +99,7 @@ def ProcessDirectory(path, compute_location):
                 update_file = True
         if add_file:
-            ProcessFile(file_path, "dense", compute_location)
         if update_file:
             print(f"Chroma: updated embeddings for {file_path}")
@@ -110,7 +110,7 @@ def ProcessDirectory(path, compute_location):
             ]
             files_to_keep = list(set(used_doc_ids))
             # Get all files in the file store
-            file_store = f"{db_dir}/file_store_{compute_location}"
             all_files = os.listdir(file_store)
             # Iterate through the files and delete those not in the list
             for file in all_files:
@@ -123,22 +123,22 @@ def ProcessDirectory(path, compute_location):
             print(f"Chroma: no change for {file_path}")
-def GetChatModel(compute_location):
     """
     Get a chat model.
     Args:
-        compute_location: Compute location for chat model (cloud or edge)
     """
-    if compute_location == "cloud":
         chat_model = ChatOpenAI(model=openai_model, temperature=0)
-    if compute_location == "edge":
         # Don't try to use edge models without a GPU
-        if compute_location == "edge" and not torch.cuda.is_available():
             raise Exception("Edge chat model selected without GPU")
         # Define the pipeline to pass to the HuggingFacePipeline class
@@ -169,7 +169,7 @@ def GetChatModel(compute_location):
 def RunChain(
     query,
-    compute_location: str = "cloud",
     search_type: str = "hybrid",
     think: bool = False,
 ):
@@ -178,7 +178,7 @@ def RunChain(
     Args:
         query: User's query
-        compute_location: Compute location for embedding and chat models (cloud or edge)
         search_type: Type of search to use. Options: "dense", "sparse", or "hybrid"
         think: Control thinking mode for SmolLM3
@@ -187,13 +187,13 @@ def RunChain(
     """
     # Get retriever instance
-    retriever = BuildRetriever(compute_location, search_type)
     if retriever is None:
         return "No retriever available. Please process some documents first."
     # Get chat model (LLM)
-    chat_model = GetChatModel(compute_location)
     # Control thinking for SmolLM3
     system_prompt = answer_prompt()
@@ -230,7 +230,7 @@ def RunChain(
 def RunGraph(
     query: str,
-    compute_location: str = "cloud",
     search_type: str = "hybrid",
     top_k: int = 6,
     think_retrieve=False,
@@ -241,7 +241,7 @@ def RunGraph(
     Args:
         query: User query to start the chat
-        compute_location: Compute location for embedding and chat models (cloud or edge)
         search_type: Type of search to use. Options: "dense", "sparse", or "hybrid"
         top_k: Number of documents to retrieve
         think_retrieve: Whether to use thinking mode for retrieval (tool-calling)
@@ -253,11 +253,11 @@ def RunGraph(
     """
     # Get chat model used in both query and generate steps
-    chat_model = GetChatModel(compute_location)
     # Build the graph
     graph_builder = BuildGraph(
         chat_model,
-        compute_location,
         search_type,
         top_k,
         think_retrieve,

 httpx_logger.setLevel(logging.WARNING)
+def ProcessDirectory(path, compute_mode):
     """
     Update vector store and sparse index for files in a directory, only adding new or updated files
     Args:
         path: Directory to process
+        compute_mode: Compute mode for embeddings (cloud or edge)
     Usage example:
         ProcessDirectory("R-help", "cloud")
     # https://stackoverflow.com/questions/76265631/chromadb-add-single-document-only-if-it-doesnt-exist
     # Get a dense retriever instance
+    retriever = BuildRetriever(compute_mode, "dense")
     # List all text files in target directory
     file_paths = glob.glob(f"{path}/*.txt")
     for file_path in file_paths:
         # Process file for sparse search (BM25S)
+        ProcessFile(file_path, "sparse", compute_mode)
         # Logic for dense search: skip file if already indexed
         # Look for existing embeddings for this file
                 update_file = True
         if add_file:
+            ProcessFile(file_path, "dense", compute_mode)
         if update_file:
             print(f"Chroma: updated embeddings for {file_path}")
             ]
             files_to_keep = list(set(used_doc_ids))
             # Get all files in the file store
+            file_store = f"{db_dir}/file_store_{compute_mode}"
             all_files = os.listdir(file_store)
             # Iterate through the files and delete those not in the list
             for file in all_files:
             print(f"Chroma: no change for {file_path}")
+def GetChatModel(compute_mode):
     """
     Get a chat model.
     Args:
+        compute_mode: Compute mode for chat model (cloud or edge)
     """
+    if compute_mode == "cloud":
         chat_model = ChatOpenAI(model=openai_model, temperature=0)
+    if compute_mode == "edge":
         # Don't try to use edge models without a GPU
+        if compute_mode == "edge" and not torch.cuda.is_available():
             raise Exception("Edge chat model selected without GPU")
         # Define the pipeline to pass to the HuggingFacePipeline class
 def RunChain(
     query,
+    compute_mode: str = "cloud",
     search_type: str = "hybrid",
     think: bool = False,
 ):
     Args:
         query: User's query
+        compute_mode: Compute mode for embedding and chat models (cloud or edge)
         search_type: Type of search to use. Options: "dense", "sparse", or "hybrid"
         think: Control thinking mode for SmolLM3
     """
     # Get retriever instance
+    retriever = BuildRetriever(compute_mode, search_type)
     if retriever is None:
         return "No retriever available. Please process some documents first."
     # Get chat model (LLM)
+    chat_model = GetChatModel(compute_mode)
     # Control thinking for SmolLM3
     system_prompt = answer_prompt()
 def RunGraph(
     query: str,
+    compute_mode: str = "cloud",
     search_type: str = "hybrid",
     top_k: int = 6,
     think_retrieve=False,
     Args:
         query: User query to start the chat
+        compute_mode: Compute mode for embedding and chat models (cloud or edge)
         search_type: Type of search to use. Options: "dense", "sparse", or "hybrid"
         top_k: Number of documents to retrieve
         think_retrieve: Whether to use thinking mode for retrieval (tool-calling)
     """
     # Get chat model used in both query and generate steps
+    chat_model = GetChatModel(compute_mode)
     # Build the graph
     graph_builder = BuildGraph(
         chat_model,
+        compute_mode,
         search_type,
         top_k,
         think_retrieve,

prompts.py CHANGED Viewed

@@ -3,11 +3,11 @@ from util import get_sources, get_start_end_months
 import re
-def retrieve_prompt(compute_location):
     """Return system prompt for query step
     Args:
-        compute_location: Compute location for embedding model (cloud or edge)
     """
     # Get start and end months from database

 import re
+def retrieve_prompt(compute_mode):
     """Return system prompt for query step
     Args:
+        compute_mode: Compute mode for embedding model (cloud or edge)
     """
     # Get start and end months from database

retriever.py CHANGED Viewed

@@ -29,7 +29,7 @@ db_dir = "db"
 def BuildRetriever(
-    compute_location,
     search_type: str = "hybrid",
     top_k=6,
     start_year=None,
@@ -40,7 +40,7 @@ def BuildRetriever(
     All retriever types are configured to return up to 6 documents for fair comparison in evals.
     Args:
-        compute_location: Compute location for embeddings (cloud or edge)
         search_type: Type of search to use. Options: "dense", "sparse", "hybrid"
         top_k: Number of documents to retrieve for "dense" and "sparse"
         start_year: Start year (optional)
@@ -49,10 +49,10 @@ def BuildRetriever(
     if search_type == "dense":
         if not (start_year or end_year):
             # No year filtering, so directly use base retriever
-            return BuildRetrieverDense(compute_location, top_k=top_k)
         else:
             # Get 1000 documents then keep top_k filtered by year
-            base_retriever = BuildRetrieverDense(compute_location, top_k=1000)
             return TopKRetriever(
                 base_retriever=base_retriever,
                 top_k=top_k,
@@ -76,10 +76,10 @@ def BuildRetriever(
         # Use floor (top_k // 2) and ceiling -(top_k // -2) to divide odd values of top_k
         # https://stackoverflow.com/questions/14822184/is-there-a-ceiling-equivalent-of-operator-in-python
         dense_retriever = BuildRetriever(
-            compute_location, "dense", (top_k // 2), start_year, end_year
         )
         sparse_retriever = BuildRetriever(
-            compute_location, "sparse", -(top_k // -2), start_year, end_year
         )
         ensemble_retriever = EnsembleRetriever(
             retrievers=[dense_retriever, sparse_retriever], weights=[1, 1]
@@ -109,23 +109,23 @@ def BuildRetrieverSparse(top_k=6):
     return retriever
-def BuildRetrieverDense(compute_location: str, top_k=6):
     """
     Build dense retriever instance with ChromaDB vectorstore
     Args:
-        compute_location: Compute location for embeddings (cloud or edge)
         top_k: Number of documents to retrieve
     """
     # Don't try to use edge models without a GPU
-    if compute_location == "edge" and not torch.cuda.is_available():
         raise Exception("Edge embeddings selected without GPU")
     # Define embedding model
-    if compute_location == "cloud":
         embedding_function = OpenAIEmbeddings(model="text-embedding-3-small")
-    if compute_location == "edge":
         # embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", show_progress=True)
         # https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.huggingface.HuggingFaceBgeEmbeddings.html
         model_name = "nomic-ai/nomic-embed-text-v1.5"
@@ -143,7 +143,7 @@ def BuildRetrieverDense(compute_location: str, top_k=6):
         )
     # Create vector store
     client_settings = chromadb.config.Settings(anonymized_telemetry=False)
-    persist_directory = f"{db_dir}/chroma_{compute_location}"
     vectorstore = Chroma(
         collection_name="R-help",
         embedding_function=embedding_function,
@@ -151,7 +151,7 @@ def BuildRetrieverDense(compute_location: str, top_k=6):
         persist_directory=persist_directory,
     )
     # The storage layer for the parent documents
-    file_store = f"{db_dir}/file_store_{compute_location}"
     byte_store = LocalFileStore(file_store)
     # Text splitter for child documents
     child_splitter = RecursiveCharacterTextSplitter(

 def BuildRetriever(
+    compute_mode,
     search_type: str = "hybrid",
     top_k=6,
     start_year=None,
     All retriever types are configured to return up to 6 documents for fair comparison in evals.
     Args:
+        compute_mode: Compute mode for embeddings (cloud or edge)
         search_type: Type of search to use. Options: "dense", "sparse", "hybrid"
         top_k: Number of documents to retrieve for "dense" and "sparse"
         start_year: Start year (optional)
     if search_type == "dense":
         if not (start_year or end_year):
             # No year filtering, so directly use base retriever
+            return BuildRetrieverDense(compute_mode, top_k=top_k)
         else:
             # Get 1000 documents then keep top_k filtered by year
+            base_retriever = BuildRetrieverDense(compute_mode, top_k=1000)
             return TopKRetriever(
                 base_retriever=base_retriever,
                 top_k=top_k,
         # Use floor (top_k // 2) and ceiling -(top_k // -2) to divide odd values of top_k
         # https://stackoverflow.com/questions/14822184/is-there-a-ceiling-equivalent-of-operator-in-python
         dense_retriever = BuildRetriever(
+            compute_mode, "dense", (top_k // 2), start_year, end_year
         )
         sparse_retriever = BuildRetriever(
+            compute_mode, "sparse", -(top_k // -2), start_year, end_year
         )
         ensemble_retriever = EnsembleRetriever(
             retrievers=[dense_retriever, sparse_retriever], weights=[1, 1]
     return retriever
+def BuildRetrieverDense(compute_mode: str, top_k=6):
     """
     Build dense retriever instance with ChromaDB vectorstore
     Args:
+        compute_mode: Compute mode for embeddings (cloud or edge)
         top_k: Number of documents to retrieve
     """
     # Don't try to use edge models without a GPU
+    if compute_mode == "edge" and not torch.cuda.is_available():
         raise Exception("Edge embeddings selected without GPU")
     # Define embedding model
+    if compute_mode == "cloud":
         embedding_function = OpenAIEmbeddings(model="text-embedding-3-small")
+    if compute_mode == "edge":
         # embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", show_progress=True)
         # https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.huggingface.HuggingFaceBgeEmbeddings.html
         model_name = "nomic-ai/nomic-embed-text-v1.5"
         )
     # Create vector store
     client_settings = chromadb.config.Settings(anonymized_telemetry=False)
+    persist_directory = f"{db_dir}/chroma_{compute_mode}"
     vectorstore = Chroma(
         collection_name="R-help",
         embedding_function=embedding_function,
         persist_directory=persist_directory,
     )
     # The storage layer for the parent documents
+    file_store = f"{db_dir}/file_store_{compute_mode}"
     byte_store = LocalFileStore(file_store)
     # Text splitter for child documents
     child_splitter = RecursiveCharacterTextSplitter(

util.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import re
-def get_collection(compute_location):
     """
     Returns the vectorstore collection.
@@ -16,7 +16,7 @@ def get_collection(compute_location):
         # Number of parent documents (unique doc_ids)
         len(set([m["doc_id"] for m in collection["metadatas"]]))
     """
-    retriever = BuildRetriever(compute_location, "dense")
     return retriever.vectorstore.get()

 import re
+def get_collection(compute_mode):
     """
     Returns the vectorstore collection.
         # Number of parent documents (unique doc_ids)
         len(set([m["doc_id"] for m in collection["metadatas"]]))
     """
+    retriever = BuildRetriever(compute_mode, "dense")
     return retriever.vectorstore.get()