Skip to content
This repository was archived by the owner on Mar 27, 2026. It is now read-only.

Commit fb58d18

Browse files
authored
Merge pull request #1 from nelsonduarte/feature/converter-pdf
feat: adicionar ferramenta Converter (PDF para PNG/JPG/DOCX/TXT)
2 parents 812fecd + ba42062 commit fb58d18

4 files changed

Lines changed: 268 additions & 0 deletions

File tree

app/tools/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
from app.tools.marca_dagua import TabMarcaDagua
1010
from app.tools.info import TabInfo
1111
from app.tools.ocr import TabOCR
12+
from app.tools.converter import TabConverter
1213

1314
__all__ = [
1415
"TabDividir", "TabJuntar", "TabRotar", "TabExtrair", "TabReordenar",
1516
"TabComprimir", "TabEncriptar", "TabMarcaDagua", "TabInfo", "TabOCR",
17+
"TabConverter",
1618
]

app/tools/converter.py

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
"""PDFApps – TabConverter: converter PDF para imagens, DOCX ou TXT."""
2+
3+
import os
4+
5+
from PySide6.QtCore import Qt
6+
from PySide6.QtWidgets import (
7+
QGroupBox, QFormLayout, QComboBox, QLabel, QFileDialog,
8+
QMessageBox, QApplication,
9+
)
10+
from pypdf import PdfReader
11+
12+
from app.base import BasePage
13+
from app.utils import section, info_lbl, pick_folder
14+
from app.widgets import DropFileEdit
15+
16+
17+
class TabConverter(BasePage):
18+
_DPI_VALUES = [72, 150, 300]
19+
20+
def __init__(self, status_fn):
21+
super().__init__("fa5s.exchange-alt", "Converter PDF",
22+
"Converta PDF para imagens (PNG/JPG), Word (DOCX) ou texto simples (TXT).",
23+
"Converter", status_fn)
24+
f = self._form
25+
26+
# -- Ficheiro de origem --
27+
f.addWidget(section("Ficheiro de origem"))
28+
self.drop_in = DropFileEdit()
29+
self.drop_in.btn.clicked.disconnect()
30+
self.drop_in.btn.clicked.connect(self._pick_input)
31+
self.drop_in.path_changed.connect(self._load_input)
32+
self.lbl_info = info_lbl()
33+
f.addWidget(self.drop_in); f.addWidget(self.lbl_info)
34+
35+
# -- Formato de saída --
36+
grp_fmt = QGroupBox("Formato de saída")
37+
gf = QFormLayout(grp_fmt)
38+
gf.setLabelAlignment(Qt.AlignmentFlag.AlignRight)
39+
self.cmb_format = QComboBox()
40+
self.cmb_format.addItems([
41+
"PNG (imagens)",
42+
"JPG (imagens)",
43+
"DOCX (Word)",
44+
"TXT (texto simples)",
45+
])
46+
self.cmb_format.currentIndexChanged.connect(self._on_format_changed)
47+
gf.addRow("Formato:", self.cmb_format)
48+
f.addWidget(grp_fmt)
49+
50+
# -- Opções de imagem (visível para PNG/JPG) --
51+
self._grp_dpi = QGroupBox("Opções de imagem")
52+
gd = QFormLayout(self._grp_dpi)
53+
gd.setLabelAlignment(Qt.AlignmentFlag.AlignRight)
54+
self.cmb_dpi = QComboBox()
55+
self.cmb_dpi.addItems([
56+
"72 DPI (ecrã)",
57+
"150 DPI (padrão)",
58+
"300 DPI (qualidade de impressão)",
59+
])
60+
self.cmb_dpi.setCurrentIndex(1)
61+
gd.addRow("Resolução:", self.cmb_dpi)
62+
f.addWidget(self._grp_dpi)
63+
64+
# -- Pasta de saída (imagens) --
65+
f.addWidget(section("Pasta de saída"))
66+
self._drop_folder = DropFileEdit(
67+
placeholder="Pasta onde serão guardadas as imagens…")
68+
self._drop_folder.btn.clicked.disconnect()
69+
self._drop_folder.btn.clicked.connect(self._pick_folder)
70+
f.addWidget(self._drop_folder)
71+
72+
# -- Ficheiro de saída (DOCX / TXT) --
73+
self._section_file = section("Ficheiro de saída")
74+
self._section_file.setVisible(False)
75+
f.addWidget(self._section_file)
76+
self._drop_file = DropFileEdit(save=True, default_name="convertido.docx")
77+
self._drop_file.setVisible(False)
78+
f.addWidget(self._drop_file)
79+
80+
self.lbl_result = QLabel("")
81+
self.lbl_result.setStyleSheet(
82+
"font-weight:600; font-size:11pt; color:#059669; "
83+
"background:transparent; padding:10px 4px;")
84+
f.addWidget(self.lbl_result)
85+
f.addStretch()
86+
87+
# ── UI callbacks ──────────────────────────────────────────────────────
88+
89+
def _on_format_changed(self, index: int):
90+
is_image = index <= 1
91+
self._grp_dpi.setVisible(is_image)
92+
self._drop_folder.setVisible(is_image)
93+
for i in range(self._form.count()):
94+
w = self._form.itemAt(i).widget()
95+
if w is self._drop_folder:
96+
prev = self._form.itemAt(i - 1).widget()
97+
if prev:
98+
prev.setVisible(is_image)
99+
break
100+
self._section_file.setVisible(not is_image)
101+
self._drop_file.setVisible(not is_image)
102+
if not is_image:
103+
ext = ".docx" if index == 2 else ".txt"
104+
inp = self.drop_in.path()
105+
if inp:
106+
base = os.path.splitext(inp)[0]
107+
self._drop_file.set_path(base + ext)
108+
109+
def _pick_input(self):
110+
p, _ = QFileDialog.getOpenFileName(self, "Abrir PDF", "", "PDF (*.pdf)")
111+
if p:
112+
self._load_input(p)
113+
114+
def _load_input(self, p: str):
115+
self.drop_in.blockSignals(True)
116+
self.drop_in.set_path(p)
117+
self.drop_in.blockSignals(False)
118+
size = os.path.getsize(p)
119+
try:
120+
r = PdfReader(p)
121+
self.lbl_info.setText(f" {len(r.pages)} páginas · {size / 1024:.1f} KB")
122+
except Exception as e:
123+
self.lbl_info.setText(f" Erro: {e}")
124+
base = os.path.splitext(p)[0]
125+
if not self._drop_folder.path():
126+
self._drop_folder.blockSignals(True)
127+
self._drop_folder.set_path(os.path.dirname(p))
128+
self._drop_folder.blockSignals(False)
129+
fmt = self.cmb_format.currentIndex()
130+
if fmt >= 2:
131+
ext = ".docx" if fmt == 2 else ".txt"
132+
self._drop_file.set_path(base + ext)
133+
134+
def _pick_folder(self):
135+
d = pick_folder(self)
136+
if d:
137+
self._drop_folder.blockSignals(True)
138+
self._drop_folder.set_path(d)
139+
self._drop_folder.blockSignals(False)
140+
141+
def auto_load(self, path: str):
142+
if path and not self.drop_in.path():
143+
self._load_input(path)
144+
145+
# ── lógica de conversão ───────────────────────────────────────────────
146+
147+
def _run(self):
148+
pdf_path = self.drop_in.path()
149+
if not pdf_path or not os.path.isfile(pdf_path):
150+
QMessageBox.warning(self, "Aviso", "Seleciona um PDF válido.")
151+
return
152+
153+
fmt = self.cmb_format.currentIndex()
154+
self.lbl_result.setText("")
155+
156+
if fmt <= 1:
157+
self._convert_images(pdf_path, fmt)
158+
elif fmt == 2:
159+
self._convert_docx(pdf_path)
160+
else:
161+
self._convert_txt(pdf_path)
162+
163+
def _convert_images(self, pdf_path: str, fmt: int):
164+
out_dir = self._drop_folder.path()
165+
if not out_dir:
166+
QMessageBox.warning(self, "Aviso", "Escolhe a pasta de saída.")
167+
return
168+
os.makedirs(out_dir, exist_ok=True)
169+
ext = "png" if fmt == 0 else "jpg"
170+
dpi = self._DPI_VALUES[self.cmb_dpi.currentIndex()]
171+
self._status(f"A converter para {ext.upper()} a {dpi} DPI…")
172+
QApplication.processEvents()
173+
try:
174+
import fitz
175+
doc = fitz.open(pdf_path)
176+
matrix = fitz.Matrix(dpi / 72, dpi / 72)
177+
total = doc.page_count
178+
for i, page in enumerate(doc):
179+
pix = page.get_pixmap(matrix=matrix)
180+
if pix.alpha:
181+
pix = fitz.Pixmap(pix, 0)
182+
if pix.n == 4:
183+
pix = fitz.Pixmap(fitz.csRGB, pix)
184+
out_file = os.path.join(out_dir, f"pagina_{i + 1:03d}.{ext}")
185+
if ext == "png":
186+
pix.save(out_file)
187+
else:
188+
try:
189+
from PIL import Image
190+
mode = "L" if pix.n == 1 else "RGB"
191+
img = Image.frombytes(mode, (pix.width, pix.height), pix.samples)
192+
img.save(out_file, "JPEG", quality=95)
193+
except ImportError:
194+
pix.save(out_file)
195+
self._status(f"A converter… {i + 1}/{total}")
196+
QApplication.processEvents()
197+
doc.close()
198+
self.lbl_result.setText(f" {total} imagens guardadas em {out_dir}")
199+
self._status(f"✔ {total} imagens exportadas")
200+
QMessageBox.information(self, "Concluído",
201+
f"{total} imagens guardadas em:\n{out_dir}")
202+
except Exception as e:
203+
QMessageBox.critical(self, "Erro", str(e))
204+
205+
def _convert_docx(self, pdf_path: str):
206+
out_path = self._drop_file.path()
207+
if not out_path:
208+
QMessageBox.warning(self, "Aviso", "Escolhe o ficheiro de saída.")
209+
return
210+
self._status("A converter para DOCX…")
211+
QApplication.processEvents()
212+
try:
213+
import fitz
214+
except ImportError:
215+
QMessageBox.critical(self, "Dependência em falta",
216+
"Instala o PyMuPDF:\n pip install pymupdf")
217+
return
218+
try:
219+
from docx import Document
220+
except ImportError:
221+
QMessageBox.critical(self, "Dependência em falta",
222+
"Instala o python-docx:\n pip install python-docx")
223+
return
224+
try:
225+
doc = fitz.open(pdf_path)
226+
docx_doc = Document()
227+
for i, page in enumerate(doc):
228+
if i > 0:
229+
docx_doc.add_page_break()
230+
text = page.get_text()
231+
for paragraph in text.split('\n'):
232+
docx_doc.add_paragraph(paragraph)
233+
docx_doc.save(out_path)
234+
doc.close()
235+
self.lbl_result.setText(f" Guardado → {os.path.basename(out_path)}")
236+
self._status(f"✔ DOCX guardado → {out_path}")
237+
QMessageBox.information(self, "Concluído",
238+
f"DOCX guardado em:\n{out_path}")
239+
except Exception as e:
240+
QMessageBox.critical(self, "Erro", str(e))
241+
242+
def _convert_txt(self, pdf_path: str):
243+
out_path = self._drop_file.path()
244+
if not out_path:
245+
QMessageBox.warning(self, "Aviso", "Escolhe o ficheiro de saída.")
246+
return
247+
self._status("A converter para TXT…")
248+
QApplication.processEvents()
249+
try:
250+
import fitz
251+
doc = fitz.open(pdf_path)
252+
with open(out_path, 'w', encoding='utf-8') as f:
253+
for i, page in enumerate(doc):
254+
if i > 0:
255+
f.write(f'\n\n--- Página {i + 1} ---\n\n')
256+
f.write(page.get_text())
257+
doc.close()
258+
self.lbl_result.setText(f" Guardado → {os.path.basename(out_path)}")
259+
self._status(f"✔ TXT guardado → {out_path}")
260+
QMessageBox.information(self, "Concluído",
261+
f"TXT guardado em:\n{out_path}")
262+
except Exception as e:
263+
QMessageBox.critical(self, "Erro", str(e))

app/window.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from app.tools.encriptar import TabEncriptar
2727
from app.tools.marca_dagua import TabMarcaDagua
2828
from app.tools.ocr import TabOCR
29+
from app.tools.converter import TabConverter
2930
from app.editor.tab import TabEditar
3031
from app.tools.info import TabInfo
3132

@@ -40,6 +41,7 @@
4041
("Encriptar", "fa5s.lock", TabEncriptar),
4142
("Marca d'água", "fa5s.stamp", TabMarcaDagua),
4243
("OCR", "fa5s.search", TabOCR),
44+
("Converter", "fa5s.exchange-alt", TabConverter),
4345
("Editar", "fa5s.edit", TabEditar),
4446
("Informação", "fa5s.info-circle", TabInfo),
4547
]

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ qtawesome>=1.4.1
44
pillow>=12.1.1
55
pymupdf>=1.27.2
66
pytesseract>=0.3.13
7+
python-docx>=1.2.0
78
pyinstaller>=6.19.0

0 commit comments

Comments
 (0)