chenguittiMaroua commited on
Commit
c6f0f67
·
verified ·
1 Parent(s): 15f5c7f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +35 -11
main.py CHANGED
@@ -723,18 +723,26 @@ def generate_dynamic_visualization_code(df: pd.DataFrame, request: Visualization
723
 
724
 
725
 
 
 
 
 
 
 
 
 
 
 
726
  @app.post("/summarize")
727
  @limiter.limit("5/minute")
728
  async def summarize_document(request: Request, file: UploadFile = File(...)):
729
  try:
730
  # Validate file type
 
 
 
731
  filename = file.filename.lower()
732
- file_ext = None
733
-
734
- for ext in SUPPORTED_EXTENSIONS:
735
- if filename.endswith(ext):
736
- file_ext = ext
737
- break
738
 
739
  if not file_ext:
740
  supported_formats = ", ".join(SUPPORTED_EXTENSIONS.keys())
@@ -747,7 +755,10 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
747
  text = await extract_text_from_file(file, file_ext)
748
 
749
  if not text.strip():
750
- raise HTTPException(400, "The document appears to be empty or contains no extractable text")
 
 
 
751
 
752
  # Clean and chunk text
753
  text = clean_text(text)
@@ -787,10 +798,15 @@ async def extract_text_from_file(file: UploadFile, file_ext: str) -> str:
787
  try:
788
  return content.decode('utf-8')
789
  except UnicodeDecodeError:
790
- # ODT is a zip file, would need proper parsing in production
791
- raise HTTPException(400, "ODT file parsing requires additional libraries")
 
 
792
  except Exception as e:
793
- raise HTTPException(400, f"Failed to extract text from file: {str(e)}")
 
 
 
794
 
795
  def clean_text(text: str) -> str:
796
  """Clean and normalize text"""
@@ -803,10 +819,18 @@ async def generate_summary(text: str, chunk_size: int = 1000) -> str:
803
  summarizer = get_summarizer()
804
  summaries = []
805
  for chunk in chunks:
806
- summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
 
 
 
 
 
807
  summaries.append(summary)
808
  return " ".join(summaries)
809
 
 
 
 
810
  @app.post("/qa")
811
  @limiter.limit("5/minute")
812
  async def question_answering(
 
723
 
724
 
725
 
726
+ SUPPORTED_EXTENSIONS: Dict[str, str] = {
727
+ '.txt': 'text/plain',
728
+ '.md': 'text/markdown',
729
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
730
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
731
+ '.pdf': 'application/pdf',
732
+ '.rtf': 'application/rtf',
733
+ '.odt': 'application/vnd.oasis.opendocument.text'
734
+ }
735
+
736
  @app.post("/summarize")
737
  @limiter.limit("5/minute")
738
  async def summarize_document(request: Request, file: UploadFile = File(...)):
739
  try:
740
  # Validate file type
741
+ if not file.filename:
742
+ raise HTTPException(status_code=400, detail="No filename provided")
743
+
744
  filename = file.filename.lower()
745
+ file_ext = next((ext for ext in SUPPORTED_EXTENSIONS if filename.endswith(ext)), None)
 
 
 
 
 
746
 
747
  if not file_ext:
748
  supported_formats = ", ".join(SUPPORTED_EXTENSIONS.keys())
 
755
  text = await extract_text_from_file(file, file_ext)
756
 
757
  if not text.strip():
758
+ raise HTTPException(
759
+ status_code=400,
760
+ detail="The document appears to be empty or contains no extractable text"
761
+ )
762
 
763
  # Clean and chunk text
764
  text = clean_text(text)
 
798
  try:
799
  return content.decode('utf-8')
800
  except UnicodeDecodeError:
801
+ raise HTTPException(
802
+ status_code=400,
803
+ detail="ODT file parsing requires additional libraries"
804
+ )
805
  except Exception as e:
806
+ raise HTTPException(
807
+ status_code=400,
808
+ detail=f"Failed to extract text from file: {str(e)}"
809
+ )
810
 
811
  def clean_text(text: str) -> str:
812
  """Clean and normalize text"""
 
819
  summarizer = get_summarizer()
820
  summaries = []
821
  for chunk in chunks:
822
+ summary = summarizer(
823
+ chunk,
824
+ max_length=150,
825
+ min_length=50,
826
+ do_sample=False
827
+ )[0]["summary_text"]
828
  summaries.append(summary)
829
  return " ".join(summaries)
830
 
831
+
832
+
833
+
834
  @app.post("/qa")
835
  @limiter.limit("5/minute")
836
  async def question_answering(