Spaces:

casamN
/

NRF_LLM

Running

App Files Files Community

casamN commited on Oct 31

Commit

1aadc7b

verified ·

1 Parent(s): c3483ff

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -7

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import os
 import gradio as gr
 import fitz
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
@@ -21,22 +22,98 @@ from PyPDF2 import PdfReader
 # ============================================
 # SECTION 1 — SIMPLE DICTIONARY TRANSLATOR
 # ============================================
 kiembu_to_english = {
     "Uvoro": "how are you", "Ri?": "When?", "Ku?": "Where?", "Uka": "come",
     "Hava": "here", "Varia": "there", "Vakuve": "close", "Kuraca": "far",
     "Ciakwa": "my/mine", "Cucu": "grandmother", "Mundu": "person",
     "Andu": "people", "Mwana": "child", "Mutumia": "woman", "Muthuri": "man",
-    "Ngai": "God", "Wendo": "love", "Ngui": "dog", "Nyomba": "house", "Ndawa": "medicine"
 }
-english_to_kiembu = {v.lower(): k for k, v in kiembu_to_english.items()}
 def translate_word(word, direction):
     if direction == "Kiembu → English":
-        return kiembu_to_english.get(word, "Not found in dictionary")
     else:
-        return english_to_kiembu.get(word.lower(), "Not found in dictionary")
 # ============================================
@@ -309,8 +386,9 @@ def build_app():
                 aims to preserve and promote indigenous linguistic heritage through advanced AI translation tools.
                 - **Languages Supported:** Kiembu ↔ English
-                - **Core Engine:** NRF LLM Model (based on lightweight MarianMT)
-                - **Developed by:** Casam Njagi Nyaga
                 - **Funding Agency:** National Research Fund (NRF), Kenya
                 - **Objective:** Foster inclusion of native languages in AI-driven communication.
                 """)

 import gradio as gr
 import fitz
 import faiss
+import re
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # ============================================
 # SECTION 1 — SIMPLE DICTIONARY TRANSLATOR
 # ============================================
+# ===========================================
+# Kiembu ↔ English Dictionary (Case & Punctuation Insensitive)
+# ===========================================
 kiembu_to_english = {
+    # Existing entries
     "Uvoro": "how are you", "Ri?": "When?", "Ku?": "Where?", "Uka": "come",
     "Hava": "here", "Varia": "there", "Vakuve": "close", "Kuraca": "far",
     "Ciakwa": "my/mine", "Cucu": "grandmother", "Mundu": "person",
     "Andu": "people", "Mwana": "child", "Mutumia": "woman", "Muthuri": "man",
+    "Ngai": "God", "Wendo": "love", "Ngui": "dog", "Nyomba": "house", "Ndawa": "medicine",
+    "Maaĩ": "water", "Mwaki": "fire", "Rĩũa": "sun", "Mweri": "moon", "Njata": "star",
+    "ĩthiga": "stone", "Mũtĩ": "tree", "ĩthangũ": "leaf", "Mũri": "root", "ĩkoro": "bark",
+    "MũndũMũrume": "man", "MũndũMũka": "woman", "fafa": "father", "Mami": "mother",
+    "Gũtũ": "ear", "Ritho": "eye", "ĩniũrũ": "nose", "Kanyua": "mouth", "ĩgego": "tooth",
+    "rũrĩmĩ": "tongue", "Njara": "hand", "Kũgũrũ": "foot", "Thakame": "blood", "ĩvĩndĩ": "bone",
+    "Ngothi": "skin", "Nyama": "meat", "Nthamaki": "fish", "Giconi": "bird", "ĩtumbĩ": "egg",
+    "Nvĩa": "horn", "Mũkia": "tail", "ĩvuta": "feather", "Njuĩrĩ": "hair", "Kĩongo": "head",
+    "Ngingo": "neck", "Mũrukuthu": "back", "Ngoro": "heart", "Itema": "liver", "nyua": "drink",
+    "ria": "eat", "mama": "sleep", "kua": "die", "ũka": "come", "ona": "see", "ĩgua": "hear",
+    "menya": "know", "ĩciria": "think", "uga": "say", "ĩmwe": "one", "ĩgarĩ": "two",
+    "ĩthatũ": "three", "ĩnya": "four", "ĩthano": "five", "ithathatu": "six", "mugwanja": "seven",
+    "inyanya": "eight", "kenda": "nine", "ĩkumi": "ten", "nene": "big", "nini": "small",
+    "ndaca": "long", "nguvi": "short", "mbega": "good", "njũku": "bad", "mbĩcuru": "full",
+    "ĩtikĩndu": "empty", "nviũ": "hot", "nvoru": "cold", "ũtukũ": "night", "Mũthenya": "day",
+    "Mbura": "rain", "rũkũngi": "wind", "nthĩ": "earth", "kĩrĩma": "mountain", "rũnjĩ": "river",
+    "ĩria": "lake/sea", "cumbĩ": "salt", "mũthanga": "sand", "ndogo": "smoke", "nyaki": "grass",
+    "njira": "path", "kivaro": "field", "kuraca": "far", "vakuvi": "near", "ava": "here",
+    "varia": "there", "ũũ": "who", "ndwi": "what", "kũ": "where", "rĩ": "when", "atia": "how",
+    "ka": "not", "onthe": "all", "engĩ": "many", "anini": "few", "jerũ": "new", "ngũrũ": "old",
+    "kĩthũrũrũ": "round", "kaũgĩ": "sharp", "ritwa": "name", "tirama": "stand", "ĩkara": "sit",
+    "thiĩ": "walk", "ngaria": "run", "va": "give", "oca": "take", "nyita": "hold",
+    "tiniaa": "cut", "ringa": "hit", "ikia": "throw", "via": "burn", "ĩthambĩra": "swim",
+    "ina": "sing", "katika": "dance", "theka": "laugh", "rĩra": "cry", "rũma": "bite",
+    "mumunya": "suck", "nungira": "smell", "ĩtigĩra": "fear", "wina toro": "sleepy",
+    "mũvũtu": "hungry", "mũnyondu": "thirsty", "ndune": "red", "njerũ": "white",
+    "mbirũ": "black", "ngirini": "green", "yellũ": "yellow", "mbulu": "blue",
+    "matu": "cloud", "maturĩ": "sky", "rũkũngũ": "dust", "mũu": "ashes", "mũkanda": "rope",
+    "kamũti": "stick", "kaviũ": "knife", "ũta": "bow", "mũgwi": "arrow", "itumũ": "spear",
+    "gitegithamaki": "fishhook", "neti": "net", "ĩtaru": "canoe", "mũrango": "door",
+    "ĩtara": "roof", "nthĩ": "floor", "Mũgeka": "mat", "kĩtanda": "bed", "mũrengeti": "blanket",
+    "nyũngũ": "pot", "kanya": "calabash", "gĩkapũ": "basket", "nduramu": "drum",
+    "rwĩmbo": "song", "rũgano": "story", "thakania": "play", "mũrata": "friend",
+    "nthũ": "enemy", "civũ": "chief", "mũkũrũ": "elder", "ĩria": "milk", "ngombe": "cow/cattle",
+    "mbũri": "goat", "ngondu": "sheep", "ngũkũ": "chicken", "ngamĩra": "camel",
+    "nvuda": "donkey", "ndegwa": "ox", "mbegũ": "seed", "ketha": "harvest", "ũma": "hoe",
+    "ĩthanwa": "axe", "mũro": "digging stick", "Mũtumi ciodo": "weaver",
+    "Mwaki nyũngũ": "potter", "mũturi": "blacksmith", "mũgwĩmi": "hunter",
+    "mũrĩthi": "herdsman", "mũteginthamaki": "fisherman", "thoko": "market",
+    "kwendia": "trade", "cenjania": "barter", "mathaa": "time", "mavinda": "season",
+    "ĩvinda rĩa riũa": "dry season", "ivinda ria mbura": "rainy season", "rĩũra": "famine",
+    "thayũ": "peace", "mbara": "war", "gũrana": "marriage", "mũviki": "bride",
+    "mũvikania": "groom", "ĩrua": "initiation", "kũrua": "circumcision",
+    "kĩkuũ": "death", "ngoma": "spirit", "ngomi": "ancestor", "mũgĩmbĩ": "finger millet",
+    "mũkombi": "pearl millet", "mwere": "bulrush millet", "mũvia": "sorghum",
+    "mbembe": "maize", "minji": "cowpea", "ndengũ": "green gram", "njavĩ": "pigeon pea",
+    "ndũma": "arrowroot/taro", "mwanga": "cassava", "gĩkũa": "yam", "ngwacĩ": "sweet potato",
+    "ĩrenge": "pumpkin", "sukuma": "kale", "terere": "amaranth", "Thageti": "spider plant",
+    "kaũrũra": "pumpkin leaves", "kunde": "cowpea leaves", "Mabuyu": "baobab fruit",
+    "nthithi": "tamarind", "mbera": "guava", "matimoko": "custard apple/soursop",
+    "macuca": "loquat", "kĩgwa": "sugarcane", "njahĩ": "sesame", "Marũrũ": "sunflower",
+    "mbiringanya": "eggplant", "nyanya": "tomato", "gĩtũngũrũ": "onion",
+    "kĩtũngũrũ saumu": "garlic", "tangauthi": "ginger", "murende": "turmeric",
+    "nduru": "chili", "mboga": "cabbage", "karati": "carrot", "njukĩ": "bee", "ukĩ": "honey",
+    "mwatu": "beehive", "mabaki": "wax", "maguta": "butter", "kĩrimũ": "cream",
+    "alenya": "ghee", "ĩria ra kũgandithua": "sour milk"
 }
+# --- Helper: Normalize user input ---
+def normalize(text):
+    """
+    Cleans text for case-insensitive and punctuation-insensitive lookup.
+    Removes punctuation (.,?!-), converts to lowercase, trims spaces.
+    """
+    text = text.lower().strip()
+    text = re.sub(r"[.,?!-]", "", text)  # remove punctuation
+    return text
+# --- Prepare lookup tables (in lowercase) ---
+kiembu_lower = {normalize(k): v for k, v in kiembu_to_english.items()}
+english_lower = {normalize(v): k for k, v in kiembu_to_english.items()}
+# --- Translation Function ---
 def translate_word(word, direction):
+    """Translate a word between Kiembu and English, ignoring case & punctuation."""
+    cleaned = normalize(word)
     if direction == "Kiembu → English":
+        return kiembu_lower.get(cleaned, "Not found in dictionary")
+    elif direction == "English → Kiembu":
+        return english_lower.get(cleaned, "Not found in dictionary")
     else:
+        return "Invalid translation direction. Use 'Kiembu → English' or 'English → Kiembu'."
 # ============================================
                 aims to preserve and promote indigenous linguistic heritage through advanced AI translation tools.
                 - **Languages Supported:** Kiembu ↔ English
+                - **Core Engine:** NRF LLM Model under development
+                - **:** Principal Investigator: Prof Lucy Kawira – Chuka University
+                - **Developed by: Technical Team: Coordinator- Casam Njagi – Chuka University
                 - **Funding Agency:** National Research Fund (NRF), Kenya
                 - **Objective:** Foster inclusion of native languages in AI-driven communication.
                 """)