From 7524167dbb671719578a9c00d8988c6d6e67c553 Mon Sep 17 00:00:00 2001 From: 0xC4 Date: Tue, 23 Dec 2025 17:20:32 +0100 Subject: [PATCH 1/7] unovonv is deprecated, use unoserver (Fixes #332) (#353) --- .github/workflows/python-tests.yml | 9 ++-- README.md | 10 +++-- documents/tasks.py | 66 +++++++++++++++++------------- documents/tests/celery_test.py | 30 +------------- pyproject.toml | 4 +- 5 files changed, 55 insertions(+), 64 deletions(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 03befb14..c8e805d4 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -46,7 +46,10 @@ jobs: run: sudo apt update - name: Install apt packages - run: sudo apt install -y graphicsmagick mupdf-tools ghostscript unoconv python3-dev + run: sudo apt install -y graphicsmagick mupdf-tools ghostscript libreoffice pipx python3-dev + + - name: Install unoserver + run: pipx install unoserver --system-site-packages - name: Install the project run: uv sync --dev @@ -64,10 +67,10 @@ jobs: run: uv run ./manage.py collectstatic --noinput -v 0 - name: pytest with SQLite - run: uv run pytest -k "not unoconv" + run: uv run pytest - name: pytest with PostgreSQL - run: uv run pytest -k "not unoconv" -m postgresql + run: uv run pytest -m postgresql env: DB_URL: postgres://postgres:postgres@localhost:5432/postgres diff --git a/README.md b/README.md index 2a0f106c..0313acf5 100644 --- a/README.md +++ b/README.md @@ -32,13 +32,17 @@ First, install uv and system dependencies: curl -LsSf https://astral.sh/uv/install.sh | sh # Ubuntu -sudo apt-get install unoconv python3-dev ruby libtiff5-dev libjpeg-dev zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python-tk mupdf-tools redis-server +sudo apt-get install libreoffice pipx python3-dev ruby libtiff5-dev libjpeg-dev zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python-tk mupdf-tools redis-server +# unoserver needs access to LibreOffice's 'uno' library from system packages +pipx install unoserver --system-site-packages sudo systemctl enable --now redis-server # Fedora -sudo dnf install unoconv python-devel ruby mupdf redis +sudo dnf install libreoffice pipx python-devel ruby mupdf redis +pipx install unoserver --system-site-packages sudo systemctl enable --now redis # Arch linux -sudo pacman -S unoconv ruby python mupdf-tools redis +sudo pacman -S libreoffice python-pipx ruby python mupdf-tools redis +pipx install unoserver --system-site-packages sudo systemctl enable --now redis ``` diff --git a/documents/tasks.py b/documents/tasks.py index 6178fb70..396e493b 100644 --- a/documents/tasks.py +++ b/documents/tasks.py @@ -4,6 +4,7 @@ import re import subprocess import tempfile +import time import uuid from io import BytesIO @@ -11,7 +12,6 @@ from django.core.files.base import ContentFile, File from celery import chain, shared_task -from celery.exceptions import SoftTimeLimitExceeded from pypdf import PdfReader from documents.models import Document, DocumentError @@ -130,34 +130,44 @@ def checksum(self, document_id: int) -> int: @short_doctask def convert_office_to_pdf(self, document_id: int) -> int: + document = Document.objects.get(pk=document_id) + try: - document = Document.objects.get(pk=document_id) - - with file_as_local( - document.original, prefix="dochub_unoconv_input_" - ) as tmpfile: - try: - sub = subprocess.check_output( - ["unoconv", "-f", "pdf", "--stdout", tmpfile.name] - ) - except OSError as e: - raise MissingBinary("unoconv") from e - except subprocess.CalledProcessError as e: - raise DocumentProcessingError( - document, exc=e, message='"unoconv" has failed: %s' % e.output[:800] - ) from e - - document.pdf.save(str(uuid.uuid4()) + ".pdf", ContentFile(sub)) - - return document_id - - except SoftTimeLimitExceeded as e: - # If we timeouted, kill the faulty openoffice daemon - # it will respawn at the next unoconv invocation - os.system("killall soffice.bin") - # Still raise the exception so the pipeline for this - # document is still stopped - raise e + # Check if unoserver is running + ping_result = subprocess.run( + ["unoping"], capture_output=True, timeout=5, check=False + ) + + if ping_result.returncode != 0: + # Server not running, start it as a daemon + # Note: --daemon causes the process to fork, so we use Popen and don't wait + subprocess.Popen( + ["unoserver", "--daemon", "--conversion-timeout", "300"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + # Give the server time to start up and be ready + time.sleep(2) + + try: + result = subprocess.run( + ["unoconvert", "-", "-", "--convert-to", "pdf"], + input=document.original.read(), + capture_output=True, + check=True, + ) + sub = result.stdout + except subprocess.CalledProcessError as e: + raise DocumentProcessingError( + document, exc=e, message='"unoconvert" has failed: %s' % e.output[:800] + ) from e + + except FileNotFoundError as e: + raise MissingBinary("unoserver") from e + + document.pdf.save(str(uuid.uuid4()) + ".pdf", ContentFile(sub)) + + return document_id @short_doctask diff --git a/documents/tests/celery_test.py b/documents/tests/celery_test.py index 406f2a64..5e02789d 100644 --- a/documents/tests/celery_test.py +++ b/documents/tests/celery_test.py @@ -1,6 +1,3 @@ -import signal -from subprocess import call - from django.core.files import File import celery @@ -14,27 +11,6 @@ pytestmark = [pytest.mark.django_db, pytest.mark.celery] -class Alarm(Exception): - pass - - -def alarm_handler(signum, frame): - raise Alarm - - -def start_unoconv(): - signal.signal(signal.SIGALRM, alarm_handler) - signal.alarm(1) - try: - call(["unoconv", "--listener"]) # workaround for a shitty unoconv - # Error: Unable to connect or start own listener. Aborting. - # Setting a timeout because if a listener exists alreay it hangs... - except Alarm: - pass - - signal.alarm(0) # cancel alarm - - def create_doc(name, ext): try: user = User.objects.get(netid="test_user") @@ -79,8 +55,8 @@ def test_send_duplicate(): assert Document.objects.filter(id=doc.id).count() == 0 -# TODO : mock unoconv and provide a fake pdf instead -@pytest.mark.unoconv +# TODO : mock unoserver and provide a fake pdf instead +@pytest.mark.unoserver @pytest.mark.slow def test_send_office(): doc = create_doc("My office doc", ".docx") @@ -89,8 +65,6 @@ def test_send_office(): f = File(fd) doc.original.save("silly-unique-deadbeef-file.docx", f) - start_unoconv() - result = process_document.delay(doc.id) assert result.status == celery.states.SUCCESS, result.traceback diff --git a/pyproject.toml b/pyproject.toml index 29313cd5..4b348ce0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,13 +62,13 @@ dev = [ ] [tool.pytest.ini_options] -norecursedirs = "ve ve3 static media .git node_modules" +norecursedirs = ".venv ve ve3 static media .git node_modules" DJANGO_SETTINGS_MODULE="www.test_settings" addopts = "--reuse-db" markers = """ slow: marks tests as slow (deselect with '-m "not slow"') network: marks tests using the network (deselect with '-m "not network"') - unoconv: uses unoconv (underterministic) + unoserver: uses unoserver for office document conversion webtest: http queries against localhost celery: uses celery tasks postgresql: needs a postgresql database to run From 0a03e89c3c1a9ce80ec574ad93c5652eecdbfbd6 Mon Sep 17 00:00:00 2001 From: 0xC4 Date: Tue, 23 Dec 2025 22:26:14 +0100 Subject: [PATCH 2/7] unovonv is deprecated, use unoserver (Fixes #332) (#353) --- documents/tasks.py | 49 ++++++++++++---------- documents/templates/documents/viewer.html | 31 +++++++------- documents/tests/celery_test.py | 51 ++++++++++++++++++++++- pyproject.toml | 4 ++ uv.lock | 11 +++++ 5 files changed, 107 insertions(+), 39 deletions(-) diff --git a/documents/tasks.py b/documents/tasks.py index 396e493b..a3348bf4 100644 --- a/documents/tasks.py +++ b/documents/tasks.py @@ -132,7 +132,7 @@ def checksum(self, document_id: int) -> int: def convert_office_to_pdf(self, document_id: int) -> int: document = Document.objects.get(pk=document_id) - try: + if settings.DEBUG: # Check if unoserver is running ping_result = subprocess.run( ["unoping"], capture_output=True, timeout=5, check=False @@ -140,30 +140,35 @@ def convert_office_to_pdf(self, document_id: int) -> int: if ping_result.returncode != 0: # Server not running, start it as a daemon - # Note: --daemon causes the process to fork, so we use Popen and don't wait - subprocess.Popen( - ["unoserver", "--daemon", "--conversion-timeout", "300"], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) + # Here we want to use the system unoserver, as it needs access to LibreOffice + try: + subprocess.Popen( + [ + f"{os.environ['HOME']}/.local/bin/unoserver", + "--daemon", + "--conversion-timeout", + "300", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + except FileNotFoundError as e: + raise MissingBinary("unoserver") from e # Give the server time to start up and be ready time.sleep(2) - try: - result = subprocess.run( - ["unoconvert", "-", "-", "--convert-to", "pdf"], - input=document.original.read(), - capture_output=True, - check=True, - ) - sub = result.stdout - except subprocess.CalledProcessError as e: - raise DocumentProcessingError( - document, exc=e, message='"unoconvert" has failed: %s' % e.output[:800] - ) from e - - except FileNotFoundError as e: - raise MissingBinary("unoserver") from e + try: + result = subprocess.run( + ["unoconvert", "-", "-", "--convert-to", "pdf"], + input=document.original.read(), + capture_output=True, + check=True, + ) + sub = result.stdout + except subprocess.CalledProcessError as e: + raise DocumentProcessingError( + document, exc=e, message='"unoconvert" has failed: %s' % e.stderr[:800] + ) from e document.pdf.save(str(uuid.uuid4()) + ".pdf", ContentFile(sub)) diff --git a/documents/templates/documents/viewer.html b/documents/templates/documents/viewer.html index 94c8f206..d64319a2 100644 --- a/documents/templates/documents/viewer.html +++ b/documents/templates/documents/viewer.html @@ -36,7 +36,7 @@

- {% if document.state == "DONE" %} + {% if document.state == "DONE" and not document.is_unconvertible %} {% endblock header %} {% block content %} - {% if document.state == "DONE" %} + {% if document.is_unconvertible %} + + {% elif document.state == "DONE" %}
@@ -165,20 +172,12 @@

{% elif document.state == "ERROR" %} - {% if document.is_unconvertible %} - - {% else %} - - {% endif %} + + {% else %}