diff --git a/app/converters/document.py b/app/converters/document.py index ec5557e..3fbb1e3 100644 --- a/app/converters/document.py +++ b/app/converters/document.py @@ -10,14 +10,45 @@ def _deny_url_fetcher(url: str, **kwargs) -> None: # --------------------------------------------------------------------------- -# DOCX → PDF +# DOCX → PDF (mammoth → HTML → WeasyPrint → PDF) # --------------------------------------------------------------------------- +# Pure-Python pipeline: mammoth extracts the DOCX body as HTML with images +# inlined as data: URIs, WeasyPrint renders it to PDF. SSRF-safe — the same +# _deny_url_fetcher used by md→pdf blocks any external URL the converted +# HTML might reference. Best-effort: footnotes, headers/footers and embedded +# OLE objects get simplified by mammoth; tables, images, hyperlinks and +# basic paragraph styles survive intact. @register(("docx", "pdf")) class DocxToPdfConverter(BaseConverter): def convert(self, input_path: Path, output_path: Path, **kwargs) -> Path: - from docx2pdf import convert # type: ignore + import mammoth + import weasyprint + + with open(input_path, "rb") as f: + result = mammoth.convert_to_html(f) + html_body = result.value + had_warnings = any(m.type == "warning" for m in result.messages) + + notice = ( + ( + '
' + "Some elements were simplified during conversion " + "(footnotes, headers/footers, or embedded objects)." + "
" + ) + if had_warnings + else "" + ) - convert(str(input_path), str(output_path)) + full_html = ( + "" + "" + f"{notice}{html_body}" + ) + weasyprint.HTML(string=full_html, url_fetcher=_deny_url_fetcher).write_pdf(str(output_path)) return output_path diff --git a/app/static/js/app.js b/app/static/js/app.js index 6dd04a5..c68affa 100644 --- a/app/static/js/app.js +++ b/app/static/js/app.js @@ -120,6 +120,17 @@ function setMode(mode) { compressMode = 'quality'; } + // Drop-zone help text differs per mode: convert covers all source formats, + // compress is limited to JPG/PNG/WebP/TIFF + MP4/AVI/MOV/MKV/WebM. The + // server rejects mismatches anyway, but showing the right list up-front + // keeps users from uploading e.g. an MP3 only to see a 422. + const supConv = document.getElementById('supported-convert'); + const supComp = document.getElementById('supported-compress'); + if (supConv && supComp) { + supConv.classList.toggle('hidden', mode !== 'convert'); + supComp.classList.toggle('hidden', mode !== 'compress'); + } + updateConvertOptionsVisibility(); renderFileList(); updateQualityVisibility(); diff --git a/app/templates/index.html b/app/templates/index.html index 0eceaf9..e282e94 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -47,11 +47,15 @@

{{ _('Convert & Compress Files') }}

{{ _('Drag & drop your files here') }}

{{ _('or click to browse (multi-file supported)') }}

-

+

{{ _('Supported: HEIC · JPG · PNG · WebP · BMP · TIFF · GIF') }}
{{ _('DOCX · PDF · TXT · MD · XLSX · CSV · JSON') }}
{{ _('MP4 · MOV · AVI · MKV · WebM · MP3 · WAV · FLAC · OGG · M4A') }}

+