MrAlexGov commited on
Commit
3fe28a6
·
verified ·
1 Parent(s): 330b1d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -19
app.py CHANGED
@@ -1,13 +1,11 @@
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
- from typing import List, Tuple, Dict, Any
4
  import torch
5
 
6
- # CPU-модели
7
  MODELS = {
8
- "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
9
- "Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
10
- "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct"
11
  }
12
 
13
  def load_model(model_key: str):
@@ -21,12 +19,15 @@ def load_model(model_key: str):
21
  "text-generation",
22
  model=model_id,
23
  tokenizer=tokenizer,
24
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
- device_map="auto" if torch.cuda.is_available() else None,
26
- max_new_tokens=512,
27
  do_sample=True,
28
  temperature=0.7,
29
- pad_token_id=tokenizer.eos_token_id
 
 
 
30
  )
31
  print(f"✅ {model_id} загружена!")
32
  return pipe
@@ -34,9 +35,9 @@ def load_model(model_key: str):
34
  model_cache = {}
35
 
36
  def respond(message: str,
37
- history: List[Tuple[str, str]],
38
  model_key: str,
39
- system_prompt: str) -> Tuple[List[Tuple[str, str]], str, Dict[str, Any]]:
40
  try:
41
  if model_key not in model_cache:
42
  model_cache[model_key] = load_model(model_key)
@@ -48,9 +49,8 @@ def respond(message: str,
48
  if system_prompt.strip():
49
  messages.append({"role": "system", "content": system_prompt})
50
 
51
- for user_msg, bot_reply in history:
52
- messages.append({"role": "user", "content": user_msg})
53
- messages.append({"role": "assistant", "content": bot_reply})
54
 
55
  messages.append({"role": "user", "content": message})
56
 
@@ -62,20 +62,20 @@ def respond(message: str,
62
 
63
  print(f"✅ Ответ: {bot_reply[:50]}...")
64
 
65
- new_history = history + [(message, bot_reply)]
66
  return new_history, "", gr.update(value="")
67
 
68
  except Exception as e:
69
  error_msg = f"❌ {model_key}: {str(e)}"
70
  print(f"💥 {error_msg}")
71
- new_history = history + [(message, error_msg)]
72
  return new_history, error_msg, gr.update(value="")
73
 
74
  with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)") as demo:
75
  gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")
76
 
77
  with gr.Row():
78
- model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
79
  system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
80
 
81
  chatbot = gr.Chatbot(height=500, label="Чат") # ← без type
@@ -97,9 +97,14 @@ with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)
97
  return [], "", gr.update(value="")
98
  clear_btn.click(clear, outputs=[chatbot, status, msg_input])
99
 
100
- def retry(history: List[Tuple[str, str]]):
101
  if history:
102
- return history[-1][0]
 
 
 
 
 
103
  return ""
104
  retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
105
 
 
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
+ from typing import List, Dict, Any, Tuple
4
  import torch
5
 
6
+ # CPU-модели (только одна маленькая модель для экономии памяти)
7
  MODELS = {
8
+ "gpt2": "gpt2", # Используем только GPT-2 для экономии памяти
 
 
9
  }
10
 
11
  def load_model(model_key: str):
 
19
  "text-generation",
20
  model=model_id,
21
  tokenizer=tokenizer,
22
+ torch_dtype=torch.float32, # Use float32 for CPU
23
+ device_map=None, # Explicitly set to CPU
24
+ max_new_tokens=128, # Ещё меньше токенов для экономии памяти
25
  do_sample=True,
26
  temperature=0.7,
27
+ pad_token_id=tokenizer.eos_token_id,
28
+ # Memory optimization parameters
29
+ low_cpu_mem_usage=True,
30
+ trust_remote_code=True
31
  )
32
  print(f"✅ {model_id} загружена!")
33
  return pipe
 
35
  model_cache = {}
36
 
37
  def respond(message: str,
38
+ history: List[Dict[str, str]],
39
  model_key: str,
40
+ system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
41
  try:
42
  if model_key not in model_cache:
43
  model_cache[model_key] = load_model(model_key)
 
49
  if system_prompt.strip():
50
  messages.append({"role": "system", "content": system_prompt})
51
 
52
+ for msg in history:
53
+ messages.append({"role": msg["role"], "content": msg["content"]})
 
54
 
55
  messages.append({"role": "user", "content": message})
56
 
 
62
 
63
  print(f"✅ Ответ: {bot_reply[:50]}...")
64
 
65
+ new_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": bot_reply}]
66
  return new_history, "", gr.update(value="")
67
 
68
  except Exception as e:
69
  error_msg = f"❌ {model_key}: {str(e)}"
70
  print(f"💥 {error_msg}")
71
+ new_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_msg}]
72
  return new_history, error_msg, gr.update(value="")
73
 
74
  with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)") as demo:
75
  gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")
76
 
77
  with gr.Row():
78
+ model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="gpt2", label="🧠 Модель")
79
  system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
80
 
81
  chatbot = gr.Chatbot(height=500, label="Чат") # ← без type
 
97
  return [], "", gr.update(value="")
98
  clear_btn.click(clear, outputs=[chatbot, status, msg_input])
99
 
100
+ def retry(history: List[Dict[str, str]]):
101
  if history:
102
+ last_user_msg = None
103
+ for msg in reversed(history):
104
+ if msg["role"] == "user":
105
+ last_user_msg = msg["content"]
106
+ break
107
+ return last_user_msg if last_user_msg else ""
108
  return ""
109
  retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
110