Spaces:
Sleeping
Sleeping
Update lib/read_pdf.py
Browse files- lib/read_pdf.py +2 -2
lib/read_pdf.py
CHANGED
|
@@ -54,7 +54,7 @@ def extract_and_format_paragraphs(pdf_path):
|
|
| 54 |
if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
|
| 55 |
# This line is a continuation of the previous one
|
| 56 |
if paragraph_lines[-1][-1] == "-":
|
| 57 |
-
paragraph_lines[-1] = paragraph_lines[-1][:-1]
|
| 58 |
paragraph_lines[-1] += line.strip()
|
| 59 |
|
| 60 |
paragraph_lines[-1] += ' ' + line.strip()
|
|
@@ -104,7 +104,7 @@ def extract_and_format_paragraphs(pdf_path):
|
|
| 104 |
width = page.width
|
| 105 |
height = page.height
|
| 106 |
|
| 107 |
-
header_height = height * 0.
|
| 108 |
#footer_height = height * 0.1 # Adjust this value based on your PDF
|
| 109 |
|
| 110 |
left_bbox = (0, header_height, width / 2, height) # Left column
|
|
|
|
| 54 |
if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
|
| 55 |
# This line is a continuation of the previous one
|
| 56 |
if paragraph_lines[-1][-1] == "-":
|
| 57 |
+
#paragraph_lines[-1] = paragraph_lines[-1][:-1]
|
| 58 |
paragraph_lines[-1] += line.strip()
|
| 59 |
|
| 60 |
paragraph_lines[-1] += ' ' + line.strip()
|
|
|
|
| 104 |
width = page.width
|
| 105 |
height = page.height
|
| 106 |
|
| 107 |
+
header_height = height * 0.08 # Adjust this value based on your PDF
|
| 108 |
#footer_height = height * 0.1 # Adjust this value based on your PDF
|
| 109 |
|
| 110 |
left_bbox = (0, header_height, width / 2, height) # Left column
|