Spaces:

chenguittiMaroua
/

asm-app

Sleeping

App Files Files Community

chenguittiMaroua commited on Apr 28

Commit

752eaa7

verified ·

1 Parent(s): e7a980f

Update main.py

Browse files

Files changed (1) hide show

main.py +46 -28

main.py CHANGED Viewed

@@ -449,7 +449,6 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
 from typing import Optional
 @app.post("/qa")
 async def question_answering(
     question: str = Form(...),
@@ -462,33 +461,10 @@ async def question_answering(
         # Process file with enhanced cleaning
         context = None
         if file:
-            file_ext = file.filename.split('.')[-1].lower()
             _, content = await process_uploaded_file(file)
-            # Handle image files differently
-            if file_ext in {"jpg", "jpeg", "png"}:
-                # First try OCR
-                try:
-                    image = Image.open(io.BytesIO(content))
-                    # Pre-process image for better OCR
-                    image = image.convert('L')  # Convert to grayscale
-                    text = pytesseract.image_to_string(image, config='--psm 6', lang='fra+eng')
-                    if text.strip():
-                        context = clean_and_translate_to_french(text)[:1200]
-                    else:
-                        # If OCR fails, try image captioning
-                        captioner = get_image_captioner()
-                        result = captioner(image)
-                        context = clean_and_translate_to_french(result[0]['generated_text'])[:1200]
-                except Exception as img_e:
-                    logger.error(f"Image processing failed: {str(img_e)}")
-                    raise HTTPException(422, "Could not extract text from image")
-            else:
-                # Handle non-image files as before
-                raw_text = extract_text(content, file_ext)
-                context = clean_and_translate_to_french(raw_text)[:1200]
-        # Rest of your QA processing remains the same...
         # Theme detection with strict French enforcement
         if "thème" in question.lower() or "theme" in question.lower():
             if not context:
@@ -535,12 +511,54 @@ async def question_answering(
             "context_used": context is not None
         }
-    except HTTPException:
-        raise
     except Exception as e:
         logger.error(f"Erreur: {str(e)}")
         raise HTTPException(500, "Erreur lors du traitement")

 from typing import Optional
 @app.post("/qa")
 async def question_answering(
     question: str = Form(...),
         # Process file with enhanced cleaning
         context = None
         if file:
             _, content = await process_uploaded_file(file)
+            raw_text = extract_text(content, file.filename.split('.')[-1])
+            context = clean_and_translate_to_french(raw_text)[:1200]  # New cleaning function
         # Theme detection with strict French enforcement
         if "thème" in question.lower() or "theme" in question.lower():
             if not context:
             "context_used": context is not None
         }
     except Exception as e:
         logger.error(f"Erreur: {str(e)}")
         raise HTTPException(500, "Erreur lors du traitement")
+# New helper functions
+def clean_and_translate_to_french(text: str) -> str:
+    """Enhanced text cleaning with basic translation"""
+    # Remove headers/footers
+    text = re.sub(r'^\s*\d+\s*$', '', text, flags=re.MULTILINE)
+    # Convert common English terms to French
+    replacements = {
+        "welcome": "bienvenue",
+        "introduction": "introduction",
+        "chapter": "chapitre",
+        "section": "section"
+    }
+    for eng, fr in replacements.items():
+        text = text.replace(eng, fr)
+    return text[:2000]
+def generate_theme_answer(context: str) -> str:
+    """Specialized theme extraction pipeline"""
+    # Step 1: Identify key topics
+    topics_prompt = (
+        "Liste 3-5 mots-clés en français représentant les sujets principaux "
+        f"de ce texte:\n{context[:1000]}"
+    )
+    topics = qa_pipeline(topics_prompt, max_length=50)[0]["generated_text"]
+    # Step 2: Generate French summary
+    summary_prompt = (
+        "Résume en une phrase en français pour un étudiant:\n"
+        f"Mots-clés: {topics}\nTexte: {context[:800]}"
+    )
+    summary = qa_pipeline(summary_prompt, max_length=60)[0]["generated_text"]
+    # Step 3: Format as theme answer
+    return summary.split(":")[-1].split(".")[0].strip().capitalize() + "."
+def validate_french_response(text: str) -> str:
+    """Ensure proper French output"""
+    # Remove English fragments
+    text = re.sub(r'[A-Za-z]{3,}', '', text)
+    # Ensure proper sentence structure
+    if not text.endswith(('.', '!', '?')):
+        text = text.split('.')[0] + '.'
+    return text.capitalize()