-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_pdf.py
More file actions
23 lines (21 loc) · 803 Bytes
/
extract_pdf.py
File metadata and controls
23 lines (21 loc) · 803 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import PyPDF2
import sys
def extract_text(pdf_path, output_path):
try:
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
with open(output_path, 'w') as out:
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text = page.extract_text()
out.write(f"--- Page {page_num+1} ---\n")
out.write(text)
out.write("\n\n")
print(f"Successfully extracted text to {output_path}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python extract_pdf.py <pdf_path> <output_path>")
else:
extract_text(sys.argv[1], sys.argv[2])