Spaces:

chenguittiMaroua
/

asm-app

Sleeping

App Files Files Community

chenguittiMaroua commited on Apr 25

Commit

7d57745

verified ·

1 Parent(s): bd5c109

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -46

main.py CHANGED Viewed

@@ -108,7 +108,7 @@ app.add_middleware(
 # Constants
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
 SUPPORTED_FILE_TYPES = {
-    "docx", "xlsx", "pptx", "pdf", "jpg", "jpeg", "png"
 }
 # Model caching
@@ -166,6 +166,10 @@ async def process_uploaded_file(file: UploadFile) -> Tuple[str, bytes]:
 def extract_text(content: bytes, file_ext: str) -> str:
     """Extract text from various file formats with enhanced Excel support"""
     try:
         if file_ext == "docx":
             doc = Document(io.BytesIO(content))
             return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
@@ -801,69 +805,47 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
 @app.post("/qa")
-@limiter.limit("5/minute")
 async def question_answering(
-    request: Request,
-    file: UploadFile = File(...),
     question: str = Form(...),
-    language: str = Form("fr")
 ):
     try:
-        file_ext, content = await process_uploaded_file(file)
-        text = extract_text(content, file_ext)
         if not text.strip():
-            raise HTTPException(400, "No extractable text found")
         # Clean and truncate text
         text = re.sub(r'\s+', ' ', text).strip()[:5000]
-        # Theme detection
-        theme_keywords = ["thème", "sujet principal", "quoi le sujet", "theme", "main topic"]
-        if any(kw in question.lower() for kw in theme_keywords):
-            try:
-                summarizer = get_summarizer()
-                summary_output = summarizer(
-                    text,
-                    max_length=min(100, len(text)//4),
-                    min_length=30,
-                    do_sample=False,
-                    truncation=True
-                )
-                theme = summary_output[0].get("summary_text", text[:200] + "...")
-                return {
-                    "question": question,
-                    "answer": f"Le document traite principalement de : {theme}",
-                    "confidence": 0.95,
-                    "language": language
-                }
-            except Exception:
-                theme = text[:200] + ("..." if len(text) > 200 else "")
-                return {
-                    "question": question,
-                    "answer": f"D'après le document : {theme}",
-                    "confidence": 0.7,
-                    "language": language,
-                    "warning": "theme_summary_fallback"
-                }
         # Standard QA
         qa = get_qa_model()
         result = qa(question=question, context=text[:3000])
-        return {
-            "question": question,
-            "answer": result["answer"],
-            "confidence": result["score"],
-            "language": language
-        }
     except HTTPException:
         raise
     except Exception as e:
-        logger.error(f"QA processing failed: {str(e)}")
-        raise HTTPException(500, detail=f"Analysis failed: {str(e)}")
 @app.post("/visualize/natural")
 async def natural_language_visualization(
     file: UploadFile = File(...),

 # Constants
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
 SUPPORTED_FILE_TYPES = {
+    "docx", "xlsx", "pptx", "pdf", "jpg", "jpeg", "png", "txt"
 }
 # Model caching
 def extract_text(content: bytes, file_ext: str) -> str:
     """Extract text from various file formats with enhanced Excel support"""
     try:
+        if file_ext == "txt":
+            # Decode plain text (handle encoding issues)
+            return content.decode("utf-8", errors="replace").strip()
         if file_ext == "docx":
             doc = Document(io.BytesIO(content))
             return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
 @app.post("/qa")
 async def question_answering(
+    file: UploadFile = File(None),  # Make optional for plain text
     question: str = Form(...),
+    text_input: str = Form(None),  # Alternative to file upload
+    language: str = Form("en")
 ):
     try:
+        # Case 1: User uploaded a file
+        if file:
+            file_ext, content = await process_uploaded_file(file)
+            text = extract_text(content, file_ext)
+        # Case 2: User provided raw text
+        elif text_input:
+            text = text_input.strip()
+        else:
+            raise HTTPException(400, "Either a file or text input is required.")
         if not text.strip():
+            raise HTTPException(400, "No usable text found.")
         # Clean and truncate text
         text = re.sub(r'\s+', ' ', text).strip()[:5000]
+        # Theme detection (if question asks for topic)
+        if any(kw in question.lower() for kw in ["theme", "topic", "subject"]):
+            summarizer = get_summarizer()
+            summary = summarizer(text, max_length=100, min_length=30)[0]["summary_text"]
+            return {"answer": f"The main topic is: {summary}"}
         # Standard QA
         qa = get_qa_model()
         result = qa(question=question, context=text[:3000])
+        return {"answer": result["answer"], "confidence": result["score"]}
     except HTTPException:
         raise
     except Exception as e:
+        logger.error(f"QA failed: {str(e)}")
+        raise HTTPException(500, "Internal server error.")
 @app.post("/visualize/natural")
 async def natural_language_visualization(
     file: UploadFile = File(...),