Skip to content

Commit b814952

Browse files
authored
Create PDF-Text-Extractor.py
1 parent 81e739b commit b814952

1 file changed

Lines changed: 79 additions & 0 deletions

File tree

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import tkinter as tk
2+
from tkinter import filedialog, messagebox, scrolledtext
3+
import ttkbootstrap as ttk
4+
from PyPDF2 import PdfReader
5+
import os
6+
7+
class PDFTextExtractor:
8+
def __init__(self, root):
9+
self.root = root
10+
self.root.title("PDF Text Extractor")
11+
self.root.geometry("800x600")
12+
13+
# Buttons Frame
14+
button_frame = ttk.Frame(self.root)
15+
button_frame.pack(pady=10)
16+
17+
self.extract_btn = ttk.Button(button_frame, text="Select PDF(s) and Extract Text", command=self.extract_text)
18+
self.extract_btn.grid(row=0, column=0, padx=5)
19+
20+
self.save_btn = ttk.Button(button_frame, text="Save Extracted Text", command=self.save_text)
21+
self.save_btn.grid(row=0, column=1, padx=5)
22+
23+
self.clear_btn = ttk.Button(button_frame, text="Clear Text", command=self.clear_text)
24+
self.clear_btn.grid(row=0, column=2, padx=5)
25+
26+
# Scrolled Text Area
27+
self.text_area = scrolledtext.ScrolledText(self.root, wrap=tk.WORD, font=("Arial", 12))
28+
self.text_area.pack(expand=True, fill=tk.BOTH, padx=10, pady=10)
29+
30+
def extract_text(self):
31+
file_paths = filedialog.askopenfilenames(
32+
filetypes=[("PDF Files", "*.pdf")],
33+
title="Select PDF file(s)"
34+
)
35+
if not file_paths:
36+
return
37+
38+
all_text = ""
39+
for file_path in file_paths:
40+
try:
41+
reader = PdfReader(file_path)
42+
text = ""
43+
for page in reader.pages:
44+
text += page.extract_text() or ""
45+
if not text.strip():
46+
text = f"[No extractable text found in {os.path.basename(file_path)}]\n"
47+
all_text += f"--- {os.path.basename(file_path)} ---\n{text}\n\n"
48+
except Exception as e:
49+
all_text += f"[Failed to extract {os.path.basename(file_path)}: {str(e)}]\n\n"
50+
51+
self.text_area.delete(1.0, tk.END)
52+
self.text_area.insert(tk.END, all_text)
53+
54+
def save_text(self):
55+
text_content = self.text_area.get(1.0, tk.END).strip()
56+
if not text_content:
57+
messagebox.showwarning("Warning", "No text to save!")
58+
return
59+
60+
save_path = filedialog.asksaveasfilename(
61+
defaultextension=".txt",
62+
filetypes=[("Text Files", "*.txt")],
63+
title="Save extracted text"
64+
)
65+
if save_path:
66+
try:
67+
with open(save_path, "w", encoding="utf-8") as f:
68+
f.write(text_content)
69+
messagebox.showinfo("Saved", "Text saved successfully!")
70+
except Exception as e:
71+
messagebox.showerror("Error", f"Failed to save text:\n{str(e)}")
72+
73+
def clear_text(self):
74+
self.text_area.delete(1.0, tk.END)
75+
76+
if __name__ == "__main__":
77+
app = ttk.Window(themename="cosmo") # You can try other themes like 'journal', 'flatly'
78+
PDFTextExtractor(app)
79+
app.mainloop()

0 commit comments

Comments
 (0)