Spaces:

MoAmir
/

Arabic-Toxicity-Detection

Sleeping

App Files Files Community

Arabic-Toxicity-Detection / app.py

MoAmir

Update app.py

7541872 verified 24 days ago

raw

history blame contribute delete

2.91 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import torch.nn.functional as F
	import re
	import os

	# --- 1. تحميل الموديل ---
	model_path = "."
	print("Loading model...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForSequenceClassification.from_pretrained(model_path)
	except Exception as e:
	print(f"Error loading from local: {e}")
	tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/MARBERTv2")
	model = AutoModelForSequenceClassification.from_pretrained("UBC-NLP/MARBERTv2", num_labels=5)

	# --- 2. الأسماء العربي ---
	MY_LABELS = {
	0: "مسيء / كراهية (Hate)",
	1: "هجومي (Offensive)",
	2: "عادي / محايد (Neutral)",
	3: "إهانة (Insult)",
	4: "تهديد (Threat)"
	}

	# --- 3. دالة التنضيف ---
	def clean_text(text):
	if not text: return ""
	text = re.sub(r'[\u064B-\u0652]', '', text)
	text = re.sub(r'[أإآ]', 'ا', text)
	text = re.sub(r'ى', 'ي', text)
	text = re.sub(r'ة', 'ه', text)
	text = re.sub(r'(.)\1+', r'\1', text)
	text = re.sub(r'[^\u0621-\u064A\u0660-\u0669\s]', '', text)
	return text

	# --- 4. دالة التنبؤ ---
	def classify_text(text):
	if not text: return {}
	cleaned = clean_text(text)
	inputs = tokenizer(cleaned, return_tensors="pt", padding=True, truncation=True, max_length=128)

	with torch.no_grad():
	logits = model(**inputs).logits

	probs = F.softmax(logits, dim=-1)[0].numpy()

	results = {}
	for i, score in enumerate(probs):
	label_name = MY_LABELS.get(i, f"Class {i}")
	results[label_name] = float(score)

	return results

	# --- 5. الواجهة مع الأمثلة (Examples) ---
	iface = gr.Interface(
	fn=classify_text,
	inputs=gr.Textbox(label="أدخل النص هنا", placeholder="اكتب جملة باللهجة المصرية..."),
	outputs=gr.Label(label="النتيجة"),
	title="نظام اكتشاف الكلام المسيء (Arabic Toxicity Detection)",
	description="نظام ذكاء اصطناعي لتصنيف التعليقات المصرية (عادي، شتيمة، تهديد، إلخ). اضغط على الأمثلة بالأسفل للتجربة.",

	# --- هنا الأمثلة اللي هتظهر تحت ---
	examples=[
	["شكرا يا ذوق على كلامك الجميل"], # مثال عادي
	["يا ابن الكلب يا حيوان"], # مثال إهانة
	["والله لاجي اكسرلك البيت فوق دماغك"], # مثال تهديد
	["ايه القرف والزبالة اللي انت بتقولها دي"], # مثال هجومي
	["الستات مكانهم المطبخ وبس"] # مثال كراهية
	]
	)

	iface.launch()