Spaces:

chenguittiMaroua
/

asm-app

Sleeping

chenguittiMaroua commited on Apr 14

Commit

66d89ea

verified ·

1 Parent(s): 3388479

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -43,7 +43,9 @@ import base64
 import warnings
 from typing import Tuple, Optional
 from pathlib import Path
 # Third-party imports
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
@@ -727,6 +729,7 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
     try:
         # Check file type
         filename = file.filename.lower()
         # Process different file types
         if filename.endswith(('.txt', '.md')):
@@ -746,10 +749,12 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
             content = await file.read()
             text = extract_text(content, 'pdf')  # Your existing PDF extraction
         else:
-            # Fallback to textract for other formats (rtf, etc.)
-            content = await file.read()
-            text = textract.process(content).decode('utf-8')
         if not text.strip():
             raise HTTPException(400, "No extractable text found")
@@ -769,7 +774,7 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
     except HTTPException:
         raise
     except Exception as e:
-        logger.error(f"Summarization failed: {str(e)}")
         raise HTTPException(500, "Document summarization failed")
 @app.post("/qa")

 import warnings
 from typing import Tuple, Optional
 from pathlib import Path
+from docx import Document
+from pptx import Presentation
+import re
 # Third-party imports
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
     try:
         # Check file type
         filename = file.filename.lower()
+        text = ""
         # Process different file types
         if filename.endswith(('.txt', '.md')):
             content = await file.read()
             text = extract_text(content, 'pdf')  # Your existing PDF extraction
         else:
+            # For unsupported formats, try to read as plain text
+            try:
+                text = (await file.read()).decode('utf-8')
+            except UnicodeDecodeError:
+                raise HTTPException(400, "Unsupported file format")
         if not text.strip():
             raise HTTPException(400, "No extractable text found")
     except HTTPException:
         raise
     except Exception as e:
+        logger.error(f"Summarization failed: {str(e)}", exc_info=True)
         raise HTTPException(500, "Document summarization failed")
 @app.post("/qa")