Spaces:

MrAlexGov
/

chat-bots-test

Running

App Files Files Community

MrAlexGov commited on 15 days ago

Commit

3fe28a6

verified ·

1 Parent(s): 330b1d3

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -19

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import gradio as gr
 from transformers import pipeline, AutoTokenizer
-from typing import List, Tuple, Dict, Any
 import torch
-# CPU-модели
 MODELS = {
-    "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
-    "Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
-    "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct"
 }
 def load_model(model_key: str):
@@ -21,12 +19,15 @@ def load_model(model_key: str):
         "text-generation",
         model=model_id,
         tokenizer=tokenizer,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-        device_map="auto" if torch.cuda.is_available() else None,
-        max_new_tokens=512,
         do_sample=True,
         temperature=0.7,
-        pad_token_id=tokenizer.eos_token_id
     )
     print(f"✅ {model_id} загружена!")
     return pipe
@@ -34,9 +35,9 @@ def load_model(model_key: str):
 model_cache = {}
 def respond(message: str,
-            history: List[Tuple[str, str]],
             model_key: str,
-            system_prompt: str) -> Tuple[List[Tuple[str, str]], str, Dict[str, Any]]:
     try:
         if model_key not in model_cache:
             model_cache[model_key] = load_model(model_key)
@@ -48,9 +49,8 @@ def respond(message: str,
         if system_prompt.strip():
             messages.append({"role": "system", "content": system_prompt})
-        for user_msg, bot_reply in history:
-            messages.append({"role": "user", "content": user_msg})
-            messages.append({"role": "assistant", "content": bot_reply})
         messages.append({"role": "user", "content": message})
@@ -62,20 +62,20 @@ def respond(message: str,
         print(f"✅ Ответ: {bot_reply[:50]}...")
-        new_history = history + [(message, bot_reply)]
         return new_history, "", gr.update(value="")
     except Exception as e:
         error_msg = f"❌ {model_key}: {str(e)}"
         print(f"💥 {error_msg}")
-        new_history = history + [(message, error_msg)]
         return new_history, error_msg, gr.update(value="")
 with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)") as demo:
     gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")
     with gr.Row():
-        model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
         system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
     chatbot = gr.Chatbot(height=500, label="Чат")  # ← без type
@@ -97,9 +97,14 @@ with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)
         return [], "", gr.update(value="")
     clear_btn.click(clear, outputs=[chatbot, status, msg_input])
-    def retry(history: List[Tuple[str, str]]):
         if history:
-            return history[-1][0]
         return ""
     retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])

 import gradio as gr
 from transformers import pipeline, AutoTokenizer
+from typing import List, Dict, Any, Tuple
 import torch
+# CPU-модели (только одна маленькая модель для экономии памяти)
 MODELS = {
+    "gpt2": "gpt2",  # Используем только GPT-2 для экономии памяти
 }
 def load_model(model_key: str):
         "text-generation",
         model=model_id,
         tokenizer=tokenizer,
+        torch_dtype=torch.float32,  # Use float32 for CPU
+        device_map=None,  # Explicitly set to CPU
+        max_new_tokens=128,  # Ещё меньше токенов для экономии памяти
         do_sample=True,
         temperature=0.7,
+        pad_token_id=tokenizer.eos_token_id,
+        # Memory optimization parameters
+        low_cpu_mem_usage=True,
+        trust_remote_code=True
     )
     print(f"✅ {model_id} загружена!")
     return pipe
 model_cache = {}
 def respond(message: str,
+            history: List[Dict[str, str]],
             model_key: str,
+            system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
     try:
         if model_key not in model_cache:
             model_cache[model_key] = load_model(model_key)
         if system_prompt.strip():
             messages.append({"role": "system", "content": system_prompt})
+        for msg in history:
+            messages.append({"role": msg["role"], "content": msg["content"]})
         messages.append({"role": "user", "content": message})
         print(f"✅ Ответ: {bot_reply[:50]}...")
+        new_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": bot_reply}]
         return new_history, "", gr.update(value="")
     except Exception as e:
         error_msg = f"❌ {model_key}: {str(e)}"
         print(f"💥 {error_msg}")
+        new_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_msg}]
         return new_history, error_msg, gr.update(value="")
 with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)") as demo:
     gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")
     with gr.Row():
+        model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="gpt2", label="🧠 Модель")
         system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
     chatbot = gr.Chatbot(height=500, label="Чат")  # ← без type
         return [], "", gr.update(value="")
     clear_btn.click(clear, outputs=[chatbot, status, msg_input])
+    def retry(history: List[Dict[str, str]]):
         if history:
+            last_user_msg = None
+            for msg in reversed(history):
+                if msg["role"] == "user":
+                    last_user_msg = msg["content"]
+                    break
+            return last_user_msg if last_user_msg else ""
         return ""
     retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])