Spaces:

chenguittiMaroua
/

asm-app

Sleeping

App Files Files Community

chenguittiMaroua commited on Apr 26

Commit

2e8f5e6

verified ·

1 Parent(s): cbf5a05

Update main.py

Browse files

Files changed (1) hide show

main.py +57 -85

main.py CHANGED Viewed

@@ -132,78 +132,44 @@ def get_summarizer():
 MODEL_CHOICES = [
-    "google/flan-t5-small",  # ~300MB (English)
-    "google/flan-t5-base",   # ~900MB (English)
-    "cmarkea/flan-t5-base-fr"  # French-optimized
 ]
-class QAService:
-    def __init__(self):
-        self.model = None
-        self.tokenizer = None
-        self.model_name = None
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-    def initialize(self):
-        """Initialize with fallback support"""
-        for model_name in MODEL_CHOICES:
-            try:
-                logger.info(f"Loading {model_name}")
-                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-                self.model = AutoModelForSeq2SeqLM.from_pretrained(
-                    model_name,
-                    device_map="auto",
-                    torch_dtype=torch.float16 if "cuda" in self.device else torch.float32
-                )
-                self.model_name = model_name
-                logger.info(f"Successfully loaded {model_name} on {self.device}")
-                return True
-            except Exception as e:
-                logger.warning(f"Failed to load {model_name}: {str(e)}")
-                continue
-        logger.error("All models failed to load")
-        return False
-    def generate_answer(self, question: str, context: Optional[str] = None):
-        """Generate answer with proper text generation parameters"""
         try:
-            input_text = f"question: {question}"
-            if context:
-                input_text += f" context: {context[:2000]}"  # Limit context size
-            inputs = self.tokenizer(
-                input_text,
-                return_tensors="pt",
-                truncation=True,
-                max_length=512
-            ).to(self.device)
-            outputs = self.model.generate(
-                **inputs,
-                max_new_tokens=150,
-                num_beams=3,
-                early_stopping=True,
-                temperature=0.7,
-                repetition_penalty=2.5,
-                no_repeat_ngram_size=3
             )
-            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
-            logger.error(f"Generation failed: {str(e)}")
-            raise
-# Initialize service
-qa_service = QAService()
 @app.on_event("startup")
 async def startup_event():
-    if not qa_service.initialize():
-        logger.error("QA service failed to initialize")
@@ -903,59 +869,65 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
 from typing import Optional
 @app.post("/qa")
-async def handle_qa(
     question: str = Form(...),
-    file: Optional[UploadFile] = File(None),
-    language: str = Form("fr")
 ):
-    """Handle QA requests with file upload support"""
-    if not qa_service.model:
         raise HTTPException(
-            503,
             detail={
-                "error": "Service unavailable",
                 "supported_models": MODEL_CHOICES,
-                "suggestion": "Try again later or contact support"
             }
         )
     try:
-        # Validate question
-        if not question.strip():
-            raise HTTPException(400, "Question cannot be empty")
-        # Process file if provided
         context = None
         if file:
             try:
-                file_ext, content = await process_uploaded_file(file)
-                context = extract_text(content, file_ext)
-                context = re.sub(r'\s+', ' ', context).strip()[:2000]  # Clean and limit
             except HTTPException:
                 raise
             except Exception as e:
                 logger.error(f"File processing failed: {str(e)}")
                 raise HTTPException(422, "File processing error")
-        # Generate answer
         try:
-            answer = qa_service.generate_answer(question, context)
             return {
                 "question": question,
-                "answer": answer,
-                "model": qa_service.model_name,
-                "context_used": context is not None,
-                "language": language
             }
         except Exception as e:
-            logger.error(f"Answer generation failed: {str(e)}")
             raise HTTPException(
-                500,
                 detail={
                     "error": "Answer generation failed",
-                    "model": qa_service.model_name,
                     "suggestion": "Try simplifying your question or reducing document size"
                 }
             )

 MODEL_CHOICES = [
+    "mrm8488/t5-base-finetuned-question-generation-ap",  # Small QA model (140MB)
+    "google/flan-t5-small",  # Official small model (300MB)
+    "hello-simpleai/chatbot"  # Very small fallback
 ]
+qa_pipeline = None
+current_model = None
+def initialize_qa():
+    global qa_pipeline, current_model
+    # Try each model in order
+    for model_name in MODEL_CHOICES:
         try:
+            logger.info(f"Attempting to load {model_name}")
+            qa_pipeline = pipeline(
+                "text2text-generation",
+                model=model_name,
+                device=0 if torch.cuda.is_available() else -1,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
             )
+            current_model = model_name
+            logger.info(f"Successfully loaded {model_name}")
+            return True
         except Exception as e:
+            logger.warning(f"Failed to load {model_name}: {str(e)}")
+            continue
+    logger.error("All model loading attempts failed")
+    return False
 @app.on_event("startup")
 async def startup_event():
+    if not initialize_qa():
+        logger.error("QA system failed to initialize")
 from typing import Optional
 @app.post("/qa")
+async def question_answering(
     question: str = Form(...),
+    file: Optional[UploadFile] = File(None)
 ):
+    """Handle QA requests with optional file context"""
+    if qa_pipeline is None:
         raise HTTPException(
+            status_code=503,
             detail={
+                "error": "QA system unavailable",
+                "status": "No working model could be loaded",
                 "supported_models": MODEL_CHOICES,
+                "recovery_suggestion": "Please try again later"
             }
         )
     try:
+        # Process input
         context = None
         if file:
             try:
+                _, content = await process_uploaded_file(file)
+                context = extract_text(content, file.filename.split('.')[-1])
+                context = re.sub(r'\s+', ' ', context).strip()[:1000]  # Clean and limit context
             except HTTPException:
                 raise
             except Exception as e:
                 logger.error(f"File processing failed: {str(e)}")
                 raise HTTPException(422, "File processing error")
+        # Generate response
         try:
+            input_text = f"question: {question}"
+            if context:
+                input_text += f" context: {context}"
+            result = qa_pipeline(
+                input_text,
+                max_length=100,
+                num_beams=2,
+                temperature=0.7,
+                repetition_penalty=2.0,
+                no_repeat_ngram_size=3
+            )
             return {
                 "question": question,
+                "answer": result[0]["generated_text"],
+                "model": current_model,
+                "context_used": context is not None
             }
         except Exception as e:
+            logger.error(f"Generation failed: {str(e)}")
             raise HTTPException(
+                status_code=500,
                 detail={
                     "error": "Answer generation failed",
+                    "model": current_model,
                     "suggestion": "Try simplifying your question or reducing document size"
                 }
             )