Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -43,7 +43,9 @@ import base64
|
|
| 43 |
import warnings
|
| 44 |
from typing import Tuple, Optional
|
| 45 |
from pathlib import Path
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
# Third-party imports
|
| 48 |
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
|
| 49 |
from fastapi.middleware.cors import CORSMiddleware
|
|
@@ -727,6 +729,7 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
|
|
| 727 |
try:
|
| 728 |
# Check file type
|
| 729 |
filename = file.filename.lower()
|
|
|
|
| 730 |
|
| 731 |
# Process different file types
|
| 732 |
if filename.endswith(('.txt', '.md')):
|
|
@@ -746,10 +749,12 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
|
|
| 746 |
content = await file.read()
|
| 747 |
text = extract_text(content, 'pdf') # Your existing PDF extraction
|
| 748 |
else:
|
| 749 |
-
#
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
|
|
|
|
|
|
| 753 |
if not text.strip():
|
| 754 |
raise HTTPException(400, "No extractable text found")
|
| 755 |
|
|
@@ -769,7 +774,7 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
|
|
| 769 |
except HTTPException:
|
| 770 |
raise
|
| 771 |
except Exception as e:
|
| 772 |
-
logger.error(f"Summarization failed: {str(e)}")
|
| 773 |
raise HTTPException(500, "Document summarization failed")
|
| 774 |
|
| 775 |
@app.post("/qa")
|
|
|
|
| 43 |
import warnings
|
| 44 |
from typing import Tuple, Optional
|
| 45 |
from pathlib import Path
|
| 46 |
+
from docx import Document
|
| 47 |
+
from pptx import Presentation
|
| 48 |
+
import re
|
| 49 |
# Third-party imports
|
| 50 |
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
|
| 51 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 729 |
try:
|
| 730 |
# Check file type
|
| 731 |
filename = file.filename.lower()
|
| 732 |
+
text = ""
|
| 733 |
|
| 734 |
# Process different file types
|
| 735 |
if filename.endswith(('.txt', '.md')):
|
|
|
|
| 749 |
content = await file.read()
|
| 750 |
text = extract_text(content, 'pdf') # Your existing PDF extraction
|
| 751 |
else:
|
| 752 |
+
# For unsupported formats, try to read as plain text
|
| 753 |
+
try:
|
| 754 |
+
text = (await file.read()).decode('utf-8')
|
| 755 |
+
except UnicodeDecodeError:
|
| 756 |
+
raise HTTPException(400, "Unsupported file format")
|
| 757 |
+
|
| 758 |
if not text.strip():
|
| 759 |
raise HTTPException(400, "No extractable text found")
|
| 760 |
|
|
|
|
| 774 |
except HTTPException:
|
| 775 |
raise
|
| 776 |
except Exception as e:
|
| 777 |
+
logger.error(f"Summarization failed: {str(e)}", exc_info=True)
|
| 778 |
raise HTTPException(500, "Document summarization failed")
|
| 779 |
|
| 780 |
@app.post("/qa")
|