Spaces:

Tongyi-MAI
/

Z-Image-Turbo

Running on Zero

QJerry commited on 5 days ago

Commit

e89c9e8

verified ·

1 Parent(s): 6cc7643

Update prompt_check.py

Files changed (1) hide show

prompt_check.py CHANGED Viewed

@@ -1,35 +1,26 @@
-def is_unsafe_prompt(model, tokenizer, user_prompt):
-    prompt = f"""You are a content safety filter. Your task is to analyze the following user input and determine if it contains unsafe content.
-    Unsafe content includes:
-    1. **Sexually Explicit Material**: Descriptions of nudity or sexual acts.
-    2. **Sensitive Political Content**: References to real-world political figures (e.g., heads of state, government officials), political parties, sensitive geopolitical events, or propaganda.
-    Analyze the user input below.
-    If the input falls into EITHER of these categories, answer "yes".
-    If the input is safe, answer "no".
-    User Input: "{user_prompt}"
-    Answer (only yes or no):"""
-    messages = [
-        {"role": "user", "content": prompt}
-    ]
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True,
-        enable_thinking=False  # Switches between thinking and non-thinking modes. Default is True.
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    # conduct text completion
-    generated_ids = model.generate(
-        **model_inputs,
-        max_new_tokens=10
-    )
-    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
     content = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")

+import re
+def clean_model_output(text):
+    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
+    text = re.sub(r"\n*(assistant|user)\n*", "", text)
+    text = re.sub(r"\n+", "\n", text).strip()
+    return text
+def is_unsafe_prompt(model, tokenizer, system_prompt=None, user_prompt=None, max_new_token=10):
+    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True,
+        enable_thinking=False,
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(**model_inputs, max_new_tokens=max_new_token)
+    output_ids = generated_ids[0][-max_new_token:].tolist()
     content = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")