Spaces:

chenguittiMaroua
/

asm-app

Sleeping

App Files Files Community

chenguittiMaroua commited on Apr 26

Commit

cbf5a05

verified ·

1 Parent(s): ea0f16e

Update main.py

Browse files

Files changed (1) hide show

main.py +92 -47

main.py CHANGED Viewed

@@ -132,45 +132,85 @@ def get_summarizer():
 MODEL_CHOICES = [
-    "patrickvonplaten/t5-tiny-random",  # Tiny test model (always works)
-    "google/flan-t5-small",             # 300MB
-    "google/flan-t5-base",              # 900MB
-    "facebook/bart-large-cnn"           # 1.6GB
 ]
 class QAService:
     def __init__(self):
         self.model = None
         self.model_name = None
-        self.device = 0 if torch.cuda.is_available() else -1
     def initialize(self):
-        """Try loading models until one succeeds"""
         for model_name in MODEL_CHOICES:
             try:
-                logger.info(f"Attempting to load {model_name}")
-                # Lightweight pipeline initialization
-                self.model = pipeline(
-                    "text2text-generation",
-                    model=model_name,
-                    device=self.device,
-                    torch_dtype=torch.float16 if self.device == 0 else torch.float32
                 )
                 self.model_name = model_name
-                logger.info(f"Successfully loaded {model_name}")
                 return True
             except Exception as e:
                 logger.warning(f"Failed to load {model_name}: {str(e)}")
                 continue
-        logger.error("All model loading attempts failed")
         return False
-# Global service instance
 qa_service = QAService()
@@ -863,55 +903,60 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
 from typing import Optional
 @app.post("/qa")
-async def handle_qa_request(
     question: str = Form(...),
-    file: Optional[UploadFile] = File(None)
 ):
-    # Initialize service if needed
     if not qa_service.model:
-        if not qa_service.initialize():
-            raise HTTPException(
-                status_code=500,
-                detail={
-                    "error": "System unavailable",
-                    "status": "Model initialization failed",
-                    "recovery_suggestion": "Retry in 30 seconds or contact support"
-                }
-            )
     try:
-        # Process input
         context = None
         if file:
-            file_ext, content = await process_uploaded_file(file)
-            context = extract_text(content, file_ext)[:2000]  # Strict limit
-        # Generate response
         try:
-            input_text = f"question: {question}" + (f" context: {context}" if context else "")
-            result = qa_service.model(
-                input_text,
-                max_length=150,
-                num_beams=2,
-                early_stopping=True
-            )
             return {
                 "question": question,
-                "answer": result[0]["generated_text"],
                 "model": qa_service.model_name,
-                "context_used": bool(context)
             }
         except Exception as e:
-            logger.error(f"Generation failed: {str(e)}")
             raise HTTPException(
-                status_code=500,
                 detail={
                     "error": "Answer generation failed",
                     "model": qa_service.model_name,
-                    "input_size": len(input_text) if 'input_text' in locals() else None,
-                    "suggestion": "Simplify your question or reduce document size"
                 }
             )

 MODEL_CHOICES = [
+    "google/flan-t5-small",  # ~300MB (English)
+    "google/flan-t5-base",   # ~900MB (English)
+    "cmarkea/flan-t5-base-fr"  # French-optimized
 ]
 class QAService:
     def __init__(self):
         self.model = None
+        self.tokenizer = None
         self.model_name = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
     def initialize(self):
+        """Initialize with fallback support"""
         for model_name in MODEL_CHOICES:
             try:
+                logger.info(f"Loading {model_name}")
+                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+                self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                    model_name,
+                    device_map="auto",
+                    torch_dtype=torch.float16 if "cuda" in self.device else torch.float32
                 )
                 self.model_name = model_name
+                logger.info(f"Successfully loaded {model_name} on {self.device}")
                 return True
             except Exception as e:
                 logger.warning(f"Failed to load {model_name}: {str(e)}")
                 continue
+        logger.error("All models failed to load")
         return False
+    def generate_answer(self, question: str, context: Optional[str] = None):
+        """Generate answer with proper text generation parameters"""
+        try:
+            input_text = f"question: {question}"
+            if context:
+                input_text += f" context: {context[:2000]}"  # Limit context size
+            inputs = self.tokenizer(
+                input_text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512
+            ).to(self.device)
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=150,
+                num_beams=3,
+                early_stopping=True,
+                temperature=0.7,
+                repetition_penalty=2.5,
+                no_repeat_ngram_size=3
+            )
+            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        except Exception as e:
+            logger.error(f"Generation failed: {str(e)}")
+            raise
+# Initialize service
 qa_service = QAService()
+@app.on_event("startup")
+async def startup_event():
+    if not qa_service.initialize():
+        logger.error("QA service failed to initialize")
 from typing import Optional
 @app.post("/qa")
+async def handle_qa(
     question: str = Form(...),
+    file: Optional[UploadFile] = File(None),
+    language: str = Form("fr")
 ):
+    """Handle QA requests with file upload support"""
     if not qa_service.model:
+        raise HTTPException(
+            503,
+            detail={
+                "error": "Service unavailable",
+                "supported_models": MODEL_CHOICES,
+                "suggestion": "Try again later or contact support"
+            }
+        )
     try:
+        # Validate question
+        if not question.strip():
+            raise HTTPException(400, "Question cannot be empty")
+        # Process file if provided
         context = None
         if file:
+            try:
+                file_ext, content = await process_uploaded_file(file)
+                context = extract_text(content, file_ext)
+                context = re.sub(r'\s+', ' ', context).strip()[:2000]  # Clean and limit
+            except HTTPException:
+                raise
+            except Exception as e:
+                logger.error(f"File processing failed: {str(e)}")
+                raise HTTPException(422, "File processing error")
+        # Generate answer
         try:
+            answer = qa_service.generate_answer(question, context)
             return {
                 "question": question,
+                "answer": answer,
                 "model": qa_service.model_name,
+                "context_used": context is not None,
+                "language": language
             }
         except Exception as e:
+            logger.error(f"Answer generation failed: {str(e)}")
             raise HTTPException(
+                500,
                 detail={
                     "error": "Answer generation failed",
                     "model": qa_service.model_name,
+                    "suggestion": "Try simplifying your question or reducing document size"
                 }
             )