MrAlexGov commited on
Commit
330b1d3
·
verified ·
1 Parent(s): a8f8634

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -27
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
- from typing import List, Dict, Any, Tuple
4
  import torch
5
 
6
- # CPU-модели (маленькие, chat-ready)
7
  MODELS = {
8
  "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
9
  "Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
@@ -11,7 +11,6 @@ MODELS = {
11
  }
12
 
13
  def load_model(model_key: str):
14
- """Lazy load pipeline."""
15
  model_id = MODELS[model_key]
16
  print(f"🚀 Загрузка {model_id}...")
17
  tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -32,14 +31,12 @@ def load_model(model_key: str):
32
  print(f"✅ {model_id} загружена!")
33
  return pipe
34
 
35
- # Global cache
36
  model_cache = {}
37
 
38
  def respond(message: str,
39
- history: List[Dict[str, str]],
40
  model_key: str,
41
- system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
42
- """Локальный чат с pipeline."""
43
  try:
44
  if model_key not in model_cache:
45
  model_cache[model_key] = load_model(model_key)
@@ -47,39 +44,33 @@ def respond(message: str,
47
 
48
  print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'")
49
 
50
- # Chat format (system + history + user)
51
  messages = []
52
  if system_prompt.strip():
53
  messages.append({"role": "system", "content": system_prompt})
54
- messages.extend(history)
 
 
 
 
55
  messages.append({"role": "user", "content": message})
56
 
57
- # Apply chat template (для instruct)
58
  tokenizer = pipe.tokenizer
59
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
60
 
61
- # Generate
62
  outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
63
  bot_reply = outputs[0]["generated_text"][len(prompt):].strip()
64
 
65
  print(f"✅ Ответ: {bot_reply[:50]}...")
66
 
67
- new_history = history + [
68
- {"role": "user", "content": message},
69
- {"role": "assistant", "content": bot_reply}
70
- ]
71
  return new_history, "", gr.update(value="")
72
 
73
  except Exception as e:
74
  error_msg = f"❌ {model_key}: {str(e)}"
75
  print(f"💥 {error_msg}")
76
- new_history = history + [
77
- {"role": "user", "content": message},
78
- {"role": "assistant", "content": error_msg}
79
- ]
80
  return new_history, error_msg, gr.update(value="")
81
 
82
- # UI — ИСПРАВЛЕНО: убраны theme и type, несовместимые с Gradio 5
83
  with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)") as demo:
84
  gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")
85
 
@@ -87,8 +78,7 @@ with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)
87
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
88
  system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
89
 
90
- # В Gradio 5+ gr.Chatbot всегда использует messages-формат, параметр type удалён
91
- chatbot = gr.Chatbot(height=500, label="Чат")
92
 
93
  with gr.Row():
94
  msg_input = gr.Textbox(placeholder="Привет! (Enter)", show_label=False, lines=1)
@@ -100,7 +90,6 @@ with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)
100
 
101
  status = gr.Textbox(label="Логи", interactive=False, lines=4)
102
 
103
- # Events
104
  send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
105
  msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
106
 
@@ -108,11 +97,11 @@ with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)
108
  return [], "", gr.update(value="")
109
  clear_btn.click(clear, outputs=[chatbot, status, msg_input])
110
 
111
- def retry(history: List[Dict[str, str]]):
112
- if len(history) >= 2 and history[-2]["role"] == "user":
113
- return history[-2]["content"]
114
  return ""
115
  retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
116
 
117
  if __name__ == "__main__":
118
- demo.queue(max_size=10).launch(debug=True)
 
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
+ from typing import List, Tuple, Dict, Any
4
  import torch
5
 
6
+ # CPU-модели
7
  MODELS = {
8
  "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
9
  "Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
 
11
  }
12
 
13
  def load_model(model_key: str):
 
14
  model_id = MODELS[model_key]
15
  print(f"🚀 Загрузка {model_id}...")
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
31
  print(f"✅ {model_id} загружена!")
32
  return pipe
33
 
 
34
  model_cache = {}
35
 
36
  def respond(message: str,
37
+ history: List[Tuple[str, str]],
38
  model_key: str,
39
+ system_prompt: str) -> Tuple[List[Tuple[str, str]], str, Dict[str, Any]]:
 
40
  try:
41
  if model_key not in model_cache:
42
  model_cache[model_key] = load_model(model_key)
 
44
 
45
  print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'")
46
 
 
47
  messages = []
48
  if system_prompt.strip():
49
  messages.append({"role": "system", "content": system_prompt})
50
+
51
+ for user_msg, bot_reply in history:
52
+ messages.append({"role": "user", "content": user_msg})
53
+ messages.append({"role": "assistant", "content": bot_reply})
54
+
55
  messages.append({"role": "user", "content": message})
56
 
 
57
  tokenizer = pipe.tokenizer
58
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
59
 
 
60
  outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
61
  bot_reply = outputs[0]["generated_text"][len(prompt):].strip()
62
 
63
  print(f"✅ Ответ: {bot_reply[:50]}...")
64
 
65
+ new_history = history + [(message, bot_reply)]
 
 
 
66
  return new_history, "", gr.update(value="")
67
 
68
  except Exception as e:
69
  error_msg = f"❌ {model_key}: {str(e)}"
70
  print(f"💥 {error_msg}")
71
+ new_history = history + [(message, error_msg)]
 
 
 
72
  return new_history, error_msg, gr.update(value="")
73
 
 
74
  with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)") as demo:
75
  gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")
76
 
 
78
  model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
79
  system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
80
 
81
+ chatbot = gr.Chatbot(height=500, label="Чат") # без type
 
82
 
83
  with gr.Row():
84
  msg_input = gr.Textbox(placeholder="Привет! (Enter)", show_label=False, lines=1)
 
90
 
91
  status = gr.Textbox(label="Логи", interactive=False, lines=4)
92
 
 
93
  send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
94
  msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
95
 
 
97
  return [], "", gr.update(value="")
98
  clear_btn.click(clear, outputs=[chatbot, status, msg_input])
99
 
100
+ def retry(history: List[Tuple[str, str]]):
101
+ if history:
102
+ return history[-1][0]
103
  return ""
104
  retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
105
 
106
  if __name__ == "__main__":
107
+ demo.queue(max_size=10).launch(debug=True, ssr_mode=False)