22#
33# SPDX-License-Identifier: Apache-2.0
44
5+ import mimetypes
56import os
67import shutil
78import subprocess
@@ -109,6 +110,26 @@ class LibreOfficeFileConverter:
109110 """A non-exhaustive mapping of supported conversion types by this component.
110111 See https://help.libreoffice.org/latest/en-GB/text/shared/guide/convertfilters.html for more information."""
111112
113+ MIME_TYPE_FALLBACKS : ClassVar [dict [str , str ]] = {
114+ "pdf" : "application/pdf" ,
115+ "doc" : "application/msword" ,
116+ "docx" : "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ,
117+ "odt" : "application/vnd.oasis.opendocument.text" ,
118+ "rtf" : "application/rtf" ,
119+ "txt" : "text/plain" ,
120+ "html" : "text/html" ,
121+ "xlsx" : "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ,
122+ "xls" : "application/vnd.ms-excel" ,
123+ "ods" : "application/vnd.oasis.opendocument.spreadsheet" ,
124+ "csv" : "text/csv" ,
125+ "pptx" : "application/vnd.openxmlformats-officedocument.presentationml.presentation" ,
126+ "ppt" : "application/vnd.ms-powerpoint" ,
127+ "odp" : "application/vnd.oasis.opendocument.presentation" ,
128+ "epub" : "application/epub+zip" ,
129+ "png" : "image/png" ,
130+ "jpg" : "image/jpeg" ,
131+ }
132+
112133 def __init__ (
113134 self ,
114135 output_file_type : OUTPUT_FILE_TYPE | None = None ,
@@ -224,6 +245,12 @@ def _validate_args(self, output_file_type: str, input_file_type: str | None = No
224245 )
225246 raise ValueError (msg )
226247
248+ def _resolve_mime_type (self , output_path : Path , output_file_type : str ) -> str | None :
249+ mime_type , _ = mimetypes .guess_type (str (output_path ))
250+ if mime_type is None :
251+ return self .MIME_TYPE_FALLBACKS .get (output_file_type )
252+ return mime_type
253+
227254 @component .output_types (output = list [ByteStream ])
228255 def run (
229256 self ,
@@ -269,14 +296,24 @@ def run(
269296 output_path , args = self ._get_conversion_args (tmp_path , tmpdir , resolved_output_file_type )
270297
271298 subprocess .run (args , check = True ) # noqa: S603 - ruff doesn't know the arguments have been validated
272- outputs .append (ByteStream (data = output_path .read_bytes ()))
299+ outputs .append (
300+ ByteStream (
301+ data = output_path .read_bytes (),
302+ mime_type = self ._resolve_mime_type (output_path , resolved_output_file_type ),
303+ )
304+ )
273305 continue
274306
275307 self ._validate_args (resolved_output_file_type , str (source ).split ("." )[- 1 ])
276308 output_path , args = self ._get_conversion_args (source , tmpdir , resolved_output_file_type )
277309
278310 subprocess .run (args , check = True ) # noqa: S603
279- outputs .append (ByteStream (data = output_path .read_bytes ()))
311+ outputs .append (
312+ ByteStream (
313+ data = output_path .read_bytes (),
314+ mime_type = self ._resolve_mime_type (output_path , resolved_output_file_type ),
315+ )
316+ )
280317
281318 return {"output" : outputs }
282319
@@ -329,7 +366,12 @@ async def run_async(
329366 process = await create_subprocess_exec (* args )
330367 # Wait for process to complete as only one instance of soffice can occur at once
331368 await process .wait ()
332- outputs .append (ByteStream (data = output_path .read_bytes ()))
369+ outputs .append (
370+ ByteStream (
371+ data = output_path .read_bytes (),
372+ mime_type = self ._resolve_mime_type (output_path , resolved_output_file_type ),
373+ )
374+ )
333375 continue
334376
335377 self ._validate_args (resolved_output_file_type , str (source ).split ("." )[- 1 ])
@@ -339,6 +381,11 @@ async def run_async(
339381 # Wait for process to complete as only one instance of soffice can occur at once
340382 await process .wait ()
341383
342- outputs .append (ByteStream (data = output_path .read_bytes ()))
384+ outputs .append (
385+ ByteStream (
386+ data = output_path .read_bytes (),
387+ mime_type = self ._resolve_mime_type (output_path , resolved_output_file_type ),
388+ )
389+ )
343390
344391 return {"output" : outputs }
0 commit comments