chat-bots-test / app.py
MrAlexGov's picture
Update app.py
70405cd verified
raw
history blame
4.96 kB
import gradio as gr
from transformers import pipeline, AutoTokenizer
from typing import List, Dict, Any, Tuple
import torch
# CPU-модели (только одна маленькая модель для экономии памяти)
# Исправлено: используем словарь вместо множества
MODELS = {
"Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
"Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
}
def load_model(model_key: str):
model_id = MODELS[model_key]
print(f"🚀 Загрузка {model_id}...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
pipe = pipeline(
"text-generation",
model=model_id,
tokenizer=tokenizer,
torch_dtype=torch.float32, # Use float32 for CPU
device_map=None, # Explicitly set to CPU
max_new_tokens=128, # Ещё меньше токенов для экономии памяти
do_sample=True,
temperature=0.7,
pad_token_id=tokenizer.eos_token_id,
# Memory optimization parameters
low_cpu_mem_usage=True,
trust_remote_code=True
)
print(f"✅ {model_id} загружена!")
return pipe
model_cache = {}
def respond(message: str,
history: List[Dict[str, str]],
model_key: str,
system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
try:
if model_key not in model_cache:
model_cache[model_key] = load_model(model_key)
pipe = model_cache[model_key]
print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'")
messages = []
if system_prompt.strip():
messages.append({"role": "system", "content": system_prompt})
for msg in history:
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": message})
tokenizer = pipe.tokenizer
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
bot_reply = outputs[0]["generated_text"][len(prompt):].strip()
print(f"✅ Ответ: {bot_reply[:50]}...")
new_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": bot_reply}]
return new_history, "", gr.update(value="")
except Exception as e:
error_msg = f"❌ {model_key}: {str(e)}"
print(f"💥 {error_msg}")
new_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_msg}]
return new_history, error_msg, gr.update(value="")
with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)") as demo:
gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")
with gr.Row():
# Исправлено: первое значение должно быть ключом из словаря MODELS
model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
chatbot = gr.Chatbot(height=500, label="Чат") # ← без type
with gr.Row():
msg_input = gr.Textbox(placeholder="Привет! (Enter)", show_label=False, lines=1)
send_btn = gr.Button("📤 Отправить", variant="primary")
with gr.Row():
clear_btn = gr.Button("🗑️ Очистить")
retry_btn = gr.Button("🔄 Повторить")
status = gr.Textbox(label="Логи", interactive=False, lines=4)
send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
def clear():
return [], "", gr.update(value="")
clear_btn.click(clear, outputs=[chatbot, status, msg_input])
def retry(history: List[Dict[str, str]]):
if history:
last_user_msg = None
for msg in reversed(history):
if msg["role"] == "user":
last_user_msg = msg["content"]
break
return last_user_msg if last_user_msg else ""
return ""
retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
if __name__ == "__main__":
demo.queue(max_size=10).launch(debug=True, ssr_mode=False)