|
4 | 4 | import re |
5 | 5 | import subprocess |
6 | 6 | import tempfile |
| 7 | +import time |
7 | 8 | import uuid |
8 | 9 | from io import BytesIO |
9 | 10 |
|
10 | 11 | from django.conf import settings |
11 | 12 | from django.core.files.base import ContentFile, File |
12 | 13 |
|
13 | 14 | from celery import chain, shared_task |
14 | | -from celery.exceptions import SoftTimeLimitExceeded |
15 | 15 | from pypdf import PdfReader |
16 | 16 |
|
17 | 17 | from documents.models import Document, DocumentError |
@@ -130,34 +130,49 @@ def checksum(self, document_id: int) -> int: |
130 | 130 |
|
131 | 131 | @short_doctask |
132 | 132 | def convert_office_to_pdf(self, document_id: int) -> int: |
133 | | - try: |
134 | | - document = Document.objects.get(pk=document_id) |
| 133 | + document = Document.objects.get(pk=document_id) |
135 | 134 |
|
136 | | - with file_as_local( |
137 | | - document.original, prefix="dochub_unoconv_input_" |
138 | | - ) as tmpfile: |
| 135 | + if settings.DEBUG: |
| 136 | + # Check if unoserver is running |
| 137 | + ping_result = subprocess.run( |
| 138 | + ["unoping"], capture_output=True, timeout=5, check=False |
| 139 | + ) |
| 140 | + |
| 141 | + if ping_result.returncode != 0: |
| 142 | + # Server not running, start it as a daemon |
| 143 | + # Here we want to use the system unoserver, as it needs access to LibreOffice |
139 | 144 | try: |
140 | | - sub = subprocess.check_output( |
141 | | - ["unoconv", "-f", "pdf", "--stdout", tmpfile.name] |
| 145 | + subprocess.Popen( |
| 146 | + [ |
| 147 | + "unoserver", |
| 148 | + "--daemon", |
| 149 | + "--conversion-timeout", |
| 150 | + "300", |
| 151 | + ], |
| 152 | + stdout=subprocess.DEVNULL, |
| 153 | + stderr=subprocess.DEVNULL, |
142 | 154 | ) |
143 | | - except OSError as e: |
144 | | - raise MissingBinary("unoconv") from e |
145 | | - except subprocess.CalledProcessError as e: |
146 | | - raise DocumentProcessingError( |
147 | | - document, exc=e, message='"unoconv" has failed: %s' % e.output[:800] |
148 | | - ) from e |
149 | | - |
150 | | - document.pdf.save(str(uuid.uuid4()) + ".pdf", ContentFile(sub)) |
151 | | - |
152 | | - return document_id |
153 | | - |
154 | | - except SoftTimeLimitExceeded as e: |
155 | | - # If we timeouted, kill the faulty openoffice daemon |
156 | | - # it will respawn at the next unoconv invocation |
157 | | - os.system("killall soffice.bin") |
158 | | - # Still raise the exception so the pipeline for this |
159 | | - # document is still stopped |
160 | | - raise e |
| 155 | + except FileNotFoundError as e: |
| 156 | + raise MissingBinary("unoserver") from e |
| 157 | + # Give the server time to start up and be ready |
| 158 | + time.sleep(2) |
| 159 | + |
| 160 | + try: |
| 161 | + result = subprocess.run( |
| 162 | + ["unoconvert", "-", "-", "--convert-to", "pdf"], |
| 163 | + input=document.original.read(), |
| 164 | + capture_output=True, |
| 165 | + check=True, |
| 166 | + ) |
| 167 | + sub = result.stdout |
| 168 | + except subprocess.CalledProcessError as e: |
| 169 | + raise DocumentProcessingError( |
| 170 | + document, exc=e, message="unoconvert has failed: %s" % e.stderr[:2000] |
| 171 | + ) from e |
| 172 | + |
| 173 | + document.pdf.save(str(uuid.uuid4()) + ".pdf", ContentFile(sub)) |
| 174 | + |
| 175 | + return document_id |
161 | 176 |
|
162 | 177 |
|
163 | 178 | @short_doctask |
|
0 commit comments