Spaces:

MrAlexGov
/

chat-bots-test

Sleeping

App Files Files Community

chat-bots-test / app.py

MrAlexGov

Update app.py

330b1d3 verified about 1 month ago

raw

history blame

4.31 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer
	from typing import List, Tuple, Dict, Any
	import torch

	# CPU-модели
	MODELS = {
	"Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
	"Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
	"Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct"
	}

	def load_model(model_key: str):
	model_id = MODELS[model_key]
	print(f"🚀 Загрузка {model_id}...")
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	pipe = pipeline(
	"text-generation",
	model=model_id,
	tokenizer=tokenizer,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	pad_token_id=tokenizer.eos_token_id
	)
	print(f"✅ {model_id} загружена!")
	return pipe

	model_cache = {}

	def respond(message: str,
	history: List[Tuple[str, str]],
	model_key: str,
	system_prompt: str) -> Tuple[List[Tuple[str, str]], str, Dict[str, Any]]:
	try:
	if model_key not in model_cache:
	model_cache[model_key] = load_model(model_key)
	pipe = model_cache[model_key]

	print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'")

	messages = []
	if system_prompt.strip():
	messages.append({"role": "system", "content": system_prompt})

	for user_msg, bot_reply in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_reply})

	messages.append({"role": "user", "content": message})

	tokenizer = pipe.tokenizer
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
	bot_reply = outputs[0]["generated_text"][len(prompt):].strip()

	print(f"✅ Ответ: {bot_reply[:50]}...")

	new_history = history + [(message, bot_reply)]
	return new_history, "", gr.update(value="")

	except Exception as e:
	error_msg = f"❌ {model_key}: {str(e)}"
	print(f"💥 {error_msg}")
	new_history = history + [(message, error_msg)]
	return new_history, error_msg, gr.update(value="")

	with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)") as demo:
	gr.Markdown("# Локальный Inference (без API!)\nМаленькие модели — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")

	with gr.Row():
	model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
	system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)

	chatbot = gr.Chatbot(height=500, label="Чат") # ← без type

	with gr.Row():
	msg_input = gr.Textbox(placeholder="Привет! (Enter)", show_label=False, lines=1)
	send_btn = gr.Button("📤 Отправить", variant="primary")

	with gr.Row():
	clear_btn = gr.Button("🗑️ Очистить")
	retry_btn = gr.Button("🔄 Повторить")

	status = gr.Textbox(label="Логи", interactive=False, lines=4)

	send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
	msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])

	def clear():
	return [], "", gr.update(value="")
	clear_btn.click(clear, outputs=[chatbot, status, msg_input])

	def retry(history: List[Tuple[str, str]]):
	if history:
	return history[-1][0]
	return ""
	retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])

	if __name__ == "__main__":
	demo.queue(max_size=10).launch(debug=True, ssr_mode=False)