Spaces:

MoAmir
/

Arabic-Toxicity-Detection

Sleeping

App Files Files Community

MoAmir commited on 23 days ago

Commit

7541872

verified ·

1 Parent(s): f4a8eb8

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -30

app.py CHANGED Viewed

@@ -3,31 +3,27 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import torch.nn.functional as F
 import re
-import json
-# --- 1. تحميل الموديل (إجباري من نفس المكان) ---
-# مش هنحط try/except عشان نتأكد انه بيقرأ ملفاتك انت
 model_path = "."
-print("Loading model from current directory...")
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForSequenceClassification.from_pretrained(model_path)
-# --- 2. قراءة ترتيب الكلاسات من ملف config.json ---
-# دي أضمن طريقة عشان الترتيب يطلع زي ما اتدرب بالظبط
-with open('config.json', 'r') as f:
-    config = json.load(f)
-    id2label = config.get('id2label')
-# لو الترتيب مش موجود في الملف، هنستخدم الترتيب الافتراضي (تأكد انه مناسب ليك)
-if not id2label:
-    id2label = {
-        "0": "مسيء / كراهية (Hate)",
-        "1": "هجومي (Offensive)",
-        "2": "عادي / محايد (Neutral)",
-        "3": "إهانة (Insult)",
-        "4": "تهديد (Threat)"
-    }
 # --- 3. دالة التنضيف ---
 def clean_text(text):
@@ -40,7 +36,7 @@ def clean_text(text):
     text = re.sub(r'[^\u0621-\u064A\u0660-\u0669\s]', '', text)
     return text
-# --- 4. التنبؤ ---
 def classify_text(text):
     if not text: return {}
     cleaned = clean_text(text)
@@ -53,19 +49,27 @@ def classify_text(text):
     results = {}
     for i, score in enumerate(probs):
-        # بنجيب الاسم الصح بناء على رقم الكلاس
-        label = id2label.get(str(i), f"Class {i}")
-        results[label] = float(score)
     return results
-# --- 5. الواجهة ---
 iface = gr.Interface(
     fn=classify_text,
-    inputs=gr.Textbox(label="اكتب النص"),
     outputs=gr.Label(label="النتيجة"),
-    title="Arabic Toxicity Detection",
-    description="تجربة النظام (يجب أن تكون الملفات pytorch_model.bin و config.json موجودة)."
 )
 iface.launch()

 import torch
 import torch.nn.functional as F
 import re
+import os
+# --- 1. تحميل الموديل ---
 model_path = "."
+print("Loading model...")
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    model = AutoModelForSequenceClassification.from_pretrained(model_path)
+except Exception as e:
+    print(f"Error loading from local: {e}")
+    tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/MARBERTv2")
+    model = AutoModelForSequenceClassification.from_pretrained("UBC-NLP/MARBERTv2", num_labels=5)
+# --- 2. الأسماء العربي ---
+MY_LABELS = {
+    0: "مسيء / كراهية (Hate)",
+    1: "هجومي (Offensive)",
+    2: "عادي / محايد (Neutral)",
+    3: "إهانة (Insult)",
+    4: "تهديد (Threat)"
+}
 # --- 3. دالة التنضيف ---
 def clean_text(text):
     text = re.sub(r'[^\u0621-\u064A\u0660-\u0669\s]', '', text)
     return text
+# --- 4. دالة التنبؤ ---
 def classify_text(text):
     if not text: return {}
     cleaned = clean_text(text)
     results = {}
     for i, score in enumerate(probs):
+        label_name = MY_LABELS.get(i, f"Class {i}")
+        results[label_name] = float(score)
     return results
+# --- 5. الواجهة مع الأمثلة (Examples) ---
 iface = gr.Interface(
     fn=classify_text,
+    inputs=gr.Textbox(label="أدخل النص هنا", placeholder="اكتب جملة باللهجة المصرية..."),
     outputs=gr.Label(label="النتيجة"),
+    title="نظام اكتشاف الكلام المسيء (Arabic Toxicity Detection)",
+    description="نظام ذكاء اصطناعي لتصنيف التعليقات المصرية (عادي، شتيمة، تهديد، إلخ). اضغط على الأمثلة بالأسفل للتجربة.",
+    # --- هنا الأمثلة اللي هتظهر تحت ---
+    examples=[
+        ["شكرا يا ذوق على كلامك الجميل"],          # مثال عادي
+        ["يا ابن الكلب يا حيوان"],                 # مثال إهانة
+        ["والله لاجي اكسرلك البيت فوق دماغك"],     # مثال تهديد
+        ["ايه القرف والزبالة اللي انت بتقولها دي"], # مثال هجومي
+        ["الستات مكانهم المطبخ وبس"]               # مثال كراهية
+    ]
 )
 iface.launch()